refactor: use String.split instead of String.splitOn or String.splitToList (#11250)

This PR introduces a function `String.split` which is based on `String.Slice.split` and therefore supports all pattern types and returns a `Std.Iter String.Slice`. This supersedes the functions `String.splitOn` and `String.splitToList`, and we remove all all uses of these functions from core. They will be deprecated in a future PR. Migrating from `String.splitOn` and `String.splitToList` is easy: we introduce functions `Iter.toStringList` and `Iter.toStringArray` that can be used to conveniently go from `Std.Iter String.Slice` to `List String` and `Array String`, so for example `s.splitOn "foo"` can be replaced by `s.split "foo" |>.toStringList`.
2025-11-19 10:35:19 +01:00 · 2025-11-19 10:35:19 +01:00 · 52d05b6972
commit 52d05b6972
parent f7031c7aa9
36 changed files with 254 additions and 180 deletions
--- a/src/Init/Data/Format/Instances.lean
+++ b/src/Init/Data/Format/Instances.lean
@ -7,7 +7,7 @@ module

 prelude
 public import Init.Data.Array.Basic
-import Init.Data.String.Basic
+import Init.Data.String.Search

 public section

@ -47,7 +47,7 @@ Converts a string to a pretty-printer document, replacing newlines in the string
 `Std.Format.line`.
 -/
 def String.toFormat (s : String) : Std.Format :=
-  Std.Format.joinSep (s.splitOn "\n") Std.Format.line
+  Std.Format.joinSep (s.split '\n').toList Std.Format.line

 instance : ToFormat String.Pos.Raw where
  format p := format p.byteIdx
--- a/src/Init/Data/String.lean
+++ b/src/Init/Data/String.lean
@ -25,3 +25,4 @@ public import Init.Data.String.Modify
 public import Init.Data.String.Termination
 public import Init.Data.String.ToSlice
 public import Init.Data.String.Search
+public import Init.Data.String.Legacy
--- a/src/Init/Data/String/Basic.lean
+++ b/src/Init/Data/String/Basic.lean
@ -1,7 +1,7 @@
 /-
 Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 Released under Apache 2.0 license as described in the file LICENSE.
-Author: Leonardo de Moura, Mario Carneiro
+Author: Leonardo de Moura, Mario Carneiro, Markus Himmel
 -/
 module

@ -2479,98 +2479,6 @@ where
 def extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String
  | s, b, e => Pos.Raw.extract s b e

-@[specialize] def splitAux (s : String) (p : Char → Bool) (b : Pos.Raw) (i : Pos.Raw) (r : List String) : List String :=
-  if h : i.atEnd s then
-    let r := (b.extract s i)::r
-    r.reverse
-  else
-    have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next s _)
-    if p (i.get s) then
-      let i' := i.next s
-      splitAux s p i' i' (b.extract s i :: r)
-    else
-      splitAux s p b (i.next s) r
-termination_by s.rawEndPos.1 - i.1
-
-/--
-Splits a string at each character for which `p` returns `true`.
-
-The characters that satisfy `p` are not included in any of the resulting strings. If multiple
-characters in a row satisfy `p`, then the resulting list will contain empty strings.
-
-Examples:
-* `"coffee tea water".split (·.isWhitespace) = ["coffee", "tea", "water"]`
-* `"coffee  tea  water".split (·.isWhitespace) = ["coffee", "", "tea", "", "water"]`
-* `"fun x =>\n  x + 1\n".split (· == '\n') = ["fun x =>", "  x + 1", ""]`
-/
-@[inline] def splitToList (s : String) (p : Char → Bool) : List String :=
-  splitAux s p 0 0 []
-
-@[inline, deprecated splitToList (since := "2025-10-17")]
-def split (s : String) (p : Char → Bool) : List String :=
-  splitToList s p
-
-/--
-Auxiliary for `splitOn`. Preconditions:
-* `sep` is not empty
-* `b <= i` are indexes into `s`
-* `j` is an index into `sep`, and not at the end
-
-It represents the state where we have currently parsed some split parts into `r` (in reverse order),
-`b` is the beginning of the string / the end of the previous match of `sep`, and the first `j` bytes
-of `sep` match the bytes `i-j .. i` of `s`.
-/
-def splitOnAux (s sep : String) (b : Pos.Raw) (i : Pos.Raw) (j : Pos.Raw) (r : List String) : List String :=
-  if i.atEnd s then
-    let r := (b.extract s i)::r
-    r.reverse
-  else
-    if i.get s == j.get sep then
-      let i := i.next s
-      let j := j.next sep
-      if j.atEnd sep then
-        splitOnAux s sep i i 0 (b.extract s (i.unoffsetBy j)::r)
-      else
-        splitOnAux s sep b i j r
-    else
-      splitOnAux s sep b ((i.unoffsetBy j).next s) 0 r
-termination_by (s.rawEndPos.1 - (j.byteDistance i), sep.rawEndPos.1 - j.1)
-decreasing_by
-  focus
-    rename_i h _ _
-    left; exact Nat.sub_lt_sub_left
-      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h)))
-      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Pos.Raw.lt_next s _))
-  focus
-    rename_i i₀ j₀ _ eq h'
-    rw [show (j₀.next sep).byteDistance (i₀.next s) = j₀.byteDistance i₀ by
-      change (_ + Char.utf8Size _) - (_ + Char.utf8Size _) = _
-      rw [(beq_iff_eq ..).1 eq, Nat.add_sub_add_right]; rfl]
-    right; exact Nat.sub_lt_sub_left
-      (Nat.lt_of_le_of_lt (Nat.le_add_right ..) (Nat.gt_of_not_le (mt decide_eq_true h')))
-      (Pos.Raw.lt_next sep _)
-  focus
-    rename_i h _
-    left; exact Nat.sub_lt_sub_left
-      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h)))
-      (Pos.Raw.lt_next s _)
-
-/--
-Splits a string `s` on occurrences of the separator string `sep`. The default separator is `" "`.
-
-When `sep` is empty, the result is `[s]`. When `sep` occurs in overlapping patterns, the first match
-is taken. There will always be exactly `n+1` elements in the returned list if there were `n`
-non-overlapping matches of `sep` in the string. The separators are not included in the returned
-substrings.
-
-Examples:
-* `"here is some text ".splitOn = ["here", "is", "some", "text", ""]`
-* `"here is some text ".splitOn "some" = ["here is ", " text "]`
-* `"here is some text ".splitOn "" = ["here is some text "]`
-* `"ababacabac".splitOn "aba" = ["", "bac", "c"]`
-/
-@[inline] def splitOn (s : String) (sep : String := " ") : List String :=
-  if sep == "" then [s] else splitOnAux s sep 0 0 0 []


 def Pos.Raw.offsetOfPosAux (s : String) (pos : Pos.Raw) (i : Pos.Raw) (offset : Nat) : Nat :=
--- a/src/Init/Data/String/Legacy.lean
+++ b/src/Init/Data/String/Legacy.lean
@ -0,0 +1,115 @@
+/-
+Copyright (c) 2016 Microsoft Corporation. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Author: Leonardo de Moura, Mario Carneiro
+-/
+module
+
+prelude
+public import Init.Data.String.Basic
+
+/-!
+# Legacy string functions
+
+This file contains `String` functions which have since been replaced by different functions and
+will be deprecated in the future.
+-/
+
+public section
+
+namespace String
+
+@[specialize] def splitAux (s : String) (p : Char → Bool) (b : Pos.Raw) (i : Pos.Raw) (r : List String) : List String :=
+  if h : i.atEnd s then
+    let r := (b.extract s i)::r
+    r.reverse
+  else
+    have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next s _)
+    if p (i.get s) then
+      let i' := i.next s
+      splitAux s p i' i' (b.extract s i :: r)
+    else
+      splitAux s p b (i.next s) r
+termination_by s.rawEndPos.1 - i.1
+
+/--
+Splits a string at each character for which `p` returns `true`.
+
+The characters that satisfy `p` are not included in any of the resulting strings. If multiple
+characters in a row satisfy `p`, then the resulting list will contain empty strings.
+
+This is a legacy function. Use `String.split` instead.
+
+Examples:
+* `"coffee tea water".split (·.isWhitespace) = ["coffee", "tea", "water"]`
+* `"coffee  tea  water".split (·.isWhitespace) = ["coffee", "", "tea", "", "water"]`
+* `"fun x =>\n  x + 1\n".split (· == '\n') = ["fun x =>", "  x + 1", ""]`
+-/
+@[inline] def splitToList (s : String) (p : Char → Bool) : List String :=
+  splitAux s p 0 0 []
+
+/--
+Auxiliary for `splitOn`. Preconditions:
+* `sep` is not empty
+* `b <= i` are indexes into `s`
+* `j` is an index into `sep`, and not at the end
+
+It represents the state where we have currently parsed some split parts into `r` (in reverse order),
+`b` is the beginning of the string / the end of the previous match of `sep`, and the first `j` bytes
+of `sep` match the bytes `i-j .. i` of `s`.
+-/
+def splitOnAux (s sep : String) (b : Pos.Raw) (i : Pos.Raw) (j : Pos.Raw) (r : List String) : List String :=
+  if i.atEnd s then
+    let r := (b.extract s i)::r
+    r.reverse
+  else
+    if i.get s == j.get sep then
+      let i := i.next s
+      let j := j.next sep
+      if j.atEnd sep then
+        splitOnAux s sep i i 0 (b.extract s (i.unoffsetBy j)::r)
+      else
+        splitOnAux s sep b i j r
+    else
+      splitOnAux s sep b ((i.unoffsetBy j).next s) 0 r
+termination_by (s.rawEndPos.1 - (j.byteDistance i), sep.rawEndPos.1 - j.1)
+decreasing_by
+  focus
+    rename_i h _ _
+    left; exact Nat.sub_lt_sub_left
+      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h)))
+      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Pos.Raw.lt_next s _))
+  focus
+    rename_i i₀ j₀ _ eq h'
+    rw [show (j₀.next sep).byteDistance (i₀.next s) = j₀.byteDistance i₀ by
+      change (_ + Char.utf8Size _) - (_ + Char.utf8Size _) = _
+      rw [(beq_iff_eq ..).1 eq, Nat.add_sub_add_right]; rfl]
+    right; exact Nat.sub_lt_sub_left
+      (Nat.lt_of_le_of_lt (Nat.le_add_right ..) (Nat.gt_of_not_le (mt decide_eq_true h')))
+      (Pos.Raw.lt_next sep _)
+  focus
+    rename_i h _
+    left; exact Nat.sub_lt_sub_left
+      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h)))
+      (Pos.Raw.lt_next s _)
+
+/--
+Splits a string `s` on occurrences of the separator string `sep`. The default separator is `" "`.
+
+When `sep` is empty, the result is `[s]`. When `sep` occurs in overlapping patterns, the first match
+is taken. There will always be exactly `n+1` elements in the returned list if there were `n`
+non-overlapping matches of `sep` in the string. The separators are not included in the returned
+substrings.
+
+This is a legacy function. Use `String.split` instead.
+
+Examples:
+* `"here is some text ".splitOn = ["here", "is", "some", "text", ""]`
+* `"here is some text ".splitOn "some" = ["here is ", " text "]`
+* `"here is some text ".splitOn "" = ["here is some text "]`
+* `"ababacabac".splitOn "aba" = ["", "bac", "c"]`
+-/
+@[inline] def splitOn (s : String) (sep : String := " ") : List String :=
+  if sep == "" then [s] else splitOnAux s sep 0 0 0 []
+
+end String
--- a/src/Init/Data/String/Search.lean
+++ b/src/Init/Data/String/Search.lean
@ -96,6 +96,25 @@ Examples:
 def find? [ToForwardSearcher ρ σ] (s : String) (pattern : ρ) : Option s.ValidPos :=
  s.startValidPos.find? pattern

+/--
+Splits a string at each subslice that matches the pattern {name}`pat`.
+
+The subslices that matched the pattern are not included in any of the resulting subslices. If
+multiple subslices in a row match the pattern, the resulting list will contain empty strings.
+
+This function is generic over all currently supported patterns.
+
+Examples:
+ * {lean}`("coffee tea water".split Char.isWhitespace).toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]`
+ * {lean}`("coffee tea water".split ' ').toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]`
+ * {lean}`("coffee tea water".split " tea ").toList == ["coffee".toSlice, "water".toSlice]`
+ * {lean}`("ababababa".split "aba").toList == ["coffee".toSlice, "water".toSlice]`
+ * {lean}`("baaab".split "aa").toList == ["b".toSlice, "ab".toSlice]`
+-/
+@[inline]
+def split [ToForwardSearcher ρ σ] (s : String) (pat : ρ) :=
+  (s.toSlice.split pat : Std.Iter String.Slice)
+
 end

 end String
--- a/src/Init/Data/String/Slice.lean
+++ b/src/Init/Data/String/Slice.lean
@ -1,7 +1,7 @@
 /-
 Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
 Released under Apache 2.0 license as described in the file LICENSE.
-Authors: Henrik Böving
+Authors: Henrik Böving, Markus Himmel
 -/
 module

@ -795,9 +795,8 @@ This function is generic over all currently supported patterns except
 {name}`String`/{name}`String.Slice`.

 Examples:
- * {lean}`("coffee tea water".toSlice.find? Char.isWhitespace).map (·.get!) == some ' '`
- * {lean}`"tea".toSlice.find? (fun (c : Char) => c == 'X') == none`
- * {lean}`("coffee tea water".toSlice.find? "tea").map (·.get!) == some 't'`
+ * {lean}`("coffee tea water".toSlice.revFind? Char.isWhitespace).map (·.get!) == some ' '`
+ * {lean}`"tea".toSlice.revFind? (fun (c : Char) => c == 'X') == none`
 -/
@[specialize pat]
 def revFind? [ToBackwardSearcher ρ σ] (s : Slice) (pat : ρ) : Option s.Pos :=
@ -1371,4 +1370,20 @@ hierarchical, and the string is split at the dots ({lean}`'.'`).
 def toName (s : Slice) : Lean.Name :=
  s.toString.toName

+instance : Std.ToFormat String.Slice where
+  format s := Std.ToFormat.format s.copy
+
 end String.Slice
+
+/-- Converts a {lean}`Std.Iter String.Slice` to a {lean}`List String`. -/
+@[inline]
+def Std.Iterators.Iter.toStringList {α : Type} [Std.Iterators.Iterator α Id String.Slice]
+    [Std.Iterators.Finite α Id] [Std.Iterators.IteratorCollect α Id Id]
+    (i : Std.Iter (α := α) String.Slice) : List String :=
+  i.map String.Slice.copy |>.toList
+
+/-- Converts a {lean}`Std.Iter String.Slice` to an {lean}`Array String`. -/
+def Std.Iterators.Iter.toStringArray {α : Type} [Std.Iterators.Iterator α Id String.Slice]
+    [Std.Iterators.Finite α Id] [Std.Iterators.IteratorCollect α Id Id]
+    (i : Std.Iter (α := α) String.Slice) : Array String :=
+  i.map String.Slice.copy |>.toArray
--- a/src/Init/System/FilePath.lean
+++ b/src/Init/System/FilePath.lean
@ -8,6 +8,7 @@ module
 prelude
 public import Init.Data.String.Basic
 import Init.Data.String.Modify
+import Init.Data.String.Search

 public section

@ -242,7 +243,7 @@ def withExtension (p : FilePath) (ext : String) : FilePath :=
 Splits a path into a list of individual file names at the platform-specific path separator.
 -/
 def components (p : FilePath) : List String :=
-  p.normalize |>.toString.splitOn pathSeparator.toString
+  p.normalize |>.toString.split pathSeparator.toString |>.toStringList

 end FilePath

@ -273,7 +274,7 @@ Separates the entries in the `$PATH` (or `%PATH%`) environment variable by the c
 platform-dependent separator character.
 -/
 def parse (s : String) : SearchPath :=
-  s.splitToList (fun c => SearchPath.separator == c) |>.map FilePath.mk
+  s.split SearchPath.separator |>.map (FilePath.mk ∘ String.Slice.copy) |>.toList

 /--
 Joins a list of paths into a suitable value for the current platform's `$PATH` (or `%PATH%`)
--- a/src/Lean/Compiler/FFI.lean
+++ b/src/Lean/Compiler/FFI.lean
@ -19,7 +19,7 @@ namespace Lean.Compiler.FFI
 private opaque getLeancExtraFlags : Unit → String

 private def flagsStringToArray (s : String) : Array String :=
-  s.splitOn.toArray |>.filter (· ≠ "")
+  s.split ' ' |>.filter (!·.isEmpty) |>.toStringArray

 /--
 Return C compiler flags for including Lean's headers.
--- a/src/Lean/Data/Lsp/Communication.lean
+++ b/src/Lean/Data/Lsp/Communication.lean
@ -9,6 +9,7 @@ module
 prelude
 public import Lean.Data.JsonRpc
 import Init.Data.String.TakeDrop
+import Init.Data.String.Search

 public section

--- a/src/Lean/DocString/Formatter.lean
+++ b/src/Lean/DocString/Formatter.lean
@ -154,8 +154,10 @@ partial def versoSyntaxToString' (stx : Syntax) : ReaderT Nat (StateM String) Un
      out "\n"
      let i ← read
      let s := Syntax.decodeStrLit (atomString s) |>.getD ""
-        |>.splitToList (· == '\n')
-        |>.map ("".pushn ' ' i ++ · ) |> "\n".intercalate
+        |>.split '\n'
+        |>.map (fun (s : String.Slice) => "".pushn ' ' i ++ s)
+        |>.toList
+        |> "\n".intercalate
      out s
      out <| "".pushn ' ' i
      out <| atomString tk2
--- a/src/Lean/DocString/Links.lean
+++ b/src/Lean/DocString/Links.lean
@ -9,6 +9,7 @@ module
 prelude
 public import Lean.Syntax
 import Init.Data.String.TakeDrop
+import Init.Data.String.Search

 public section

@ -70,7 +71,7 @@ def manualLink (kind name : String) : Except String String :=
    throw s!"Unknown documentation type `{kind}`. Expected one of the following: {acceptableKinds}"

 private def rw (path : String) : Except String String := do
-  match path.splitOn "/" with
+  match path.split '/' |>.toStringList with
  | [] | [""] =>
    throw "Missing documentation type"
  | kind :: args =>
--- a/src/Lean/DocString/Parser.lean
+++ b/src/Lean/DocString/Parser.lean
@ -1049,7 +1049,7 @@ mutual
    deIndent (n : Nat) (str : String) : String := Id.run do
      let str := if str != "" && str.back == '\n' then str.dropEnd 1 |>.copy else str
      let mut out := ""
-      for line in str.splitOn "\n" do
+      for line in str.split '\n' do
        out := out ++ line.drop n ++ "\n"
      out

--- a/src/Lean/Elab/DocString.lean
+++ b/src/Lean/Elab/DocString.lean
@ -1207,9 +1207,9 @@ private def mkSuggestion
    let pre := String.Pos.Raw.extract text.source 0 b
    let post := String.Pos.Raw.extract text.source e text.source.rawEndPos
    let edits := newStrings.map fun (s, _, _) =>
-      let lines := text.source.splitToList (· == '\n') |>.toArray
+      let lines := text.source.split '\n' |>.toStringArray
      let s' := pre ++ s ++ post
-      let lines' := s'.splitToList (· == '\n') |>.toArray
+      let lines' := s'.split '\n' |>.toStringArray
      let d := diff lines lines'
      toMessageData <| Diff.linesToString <| d.filter (·.1 != Action.skip)
    pure m!"\n\nHint: {hintTitle}\n{indentD <| m!"\n".joinSep edits.toList}"
--- a/src/Lean/Elab/DocString/Builtin/Keywords.lean
+++ b/src/Lean/Elab/DocString/Builtin/Keywords.lean
@ -330,7 +330,7 @@ private def withAtoms (cat : Name) (atoms : List String) : TermElabM (Array Name
 private def kwImpl (cat : Ident := mkIdent .anonymous) (of : Ident := mkIdent .anonymous)
    (suggest : Bool)
    (s : StrLit) : TermElabM (Inline ElabInline) := do
-  let atoms := s.getString |>.splitToList (·.isWhitespace)
+  let atoms := s.getString |>.split Char.isWhitespace |>.toStringList
  let env ← getEnv
  let parsers := Lean.Parser.parserExtension.getState env
  let cat' := cat.getId
@ -495,7 +495,7 @@ Suggests the `kw` role, if applicable.
 -/
@[builtin_doc_code_suggestions]
 public def suggestKw (code : StrLit) : DocM (Array CodeSuggestion) := do
-  let atoms := code.getString |>.splitToList (·.isWhitespace)
+  let atoms := code.getString |>.split Char.isWhitespace |>.toStringList
  let env ← getEnv
  let parsers := Lean.Parser.parserExtension.getState env
  let cats := parsers.categories.toArray
--- a/src/Lean/Elab/GuardMsgs.lean
+++ b/src/Lean/Elab/GuardMsgs.lean
@ -166,7 +166,7 @@ def WhitespaceMode.apply (mode : WhitespaceMode) (s : String) : String :=
  match mode with
  | .exact => s
  | .normalized => s.replace "\n" " "
-  | .lax => String.intercalate " " <| (s.splitToList Char.isWhitespace).filter (!·.isEmpty)
+  | .lax => " ".toSlice.intercalate <| (s.split Char.isWhitespace).filter (!·.isEmpty) |>.toList

 /--
 Applies a message ordering mode.
@ -215,7 +215,7 @@ def MessageOrdering.apply (mode : MessageOrdering) (msgs : List String) : List S
      modify fun st => { st with messages := msgs }
      let feedback :=
        if guard_msgs.diff.get (← getOptions) then
-          let diff := Diff.diff (expected.splitToList (· == '\n')).toArray (res.splitToList (· == '\n')).toArray
+          let diff := Diff.diff (expected.split '\n').toStringArray (res.split '\n').toStringArray
          Diff.linesToString diff
        else res
      logErrorAt tk m!"❌️ Docstring on `#guard_msgs` does not match generated message:\n\n{feedback}"
--- a/src/Lean/Elab/Tactic/Doc.lean
+++ b/src/Lean/Elab/Tactic/Doc.lean
@ -108,7 +108,7 @@ Displays all available tactic tags, with documentation.

  let showDocs : Option String → MessageData
    | none => .nil
-    | some d => Format.line ++ MessageData.joinSep ((d.splitOn "\n").map toMessageData) Format.line
+    | some d => Format.line ++ MessageData.joinSep ((d.split '\n').map (toMessageData ∘ String.Slice.copy)).toList Format.line

  let showTactics (tag : Name) : MetaM MessageData := do
    match mapping.find? tag with
--- a/src/Lean/Elab/Tactic/Try.lean
+++ b/src/Lean/Elab/Tactic/Try.lean
@ -338,9 +338,8 @@ private def expandUserTactic (tac : TSyntax `tactic) (goal : MVarId) : MetaM (Ar
        for msg in newMsgs do
          if msg.severity == MessageSeverity.information then
            let msgText ← msg.data.toString
-            for line in msgText.splitOn "\n" do
-              if line.startsWith "  [apply] " then
-                let tacticText := line.drop "  [apply] ".length
+            for line in msgText.split '\n' do
+              if let some tacticText := line.dropPrefix? "  [apply] " then
                let env ← getEnv
                if let .ok stx := Parser.runParserCategory env `tactic tacticText.copy then
                  suggestions := suggestions.push ⟨stx⟩
--- a/src/Lean/ErrorExplanation.lean
+++ b/src/Lean/ErrorExplanation.lean
@ -152,10 +152,10 @@ deriving Repr, Inhabited

 /-- Creates an iterator for validation from the raw contents of an error explanation. -/
 private def ValidationState.ofSource (input : String) : ValidationState where
-  lines := input.splitOn "\n"
+  lines := input.split '\n'
+    |>.filter (!·.trimAscii.isEmpty)
+    |>.toStringArray
    |>.zipIdx
-    |>.filter (!·.1.trimAscii.isEmpty)
-    |>.toArray

 -- Workaround to account for the fact that `Input` expects "EOF" to be a valid position
 private def ValidationState.get (s : ValidationState) :=
--- a/src/Lean/Message.lean
+++ b/src/Lean/Message.lean
@ -11,6 +11,7 @@ prelude
 public import Init.Data.Slice.Array
 public import Lean.Util.PPExt
 public import Lean.Util.Sorry
+import Init.Data.String.Search

 public section

@ -703,9 +704,9 @@ class ToMessageData (α : Type) where
 export ToMessageData (toMessageData)

 def stringToMessageData (str : String) : MessageData :=
-  let lines := str.splitToList (· == '\n')
+  let lines := str.split '\n'
  let lines := lines.map (MessageData.ofFormat ∘ format)
-  MessageData.joinSep lines (MessageData.ofFormat Format.line)
+  MessageData.joinSep lines.toList (MessageData.ofFormat Format.line)

 instance [ToFormat α] : ToMessageData α := ⟨MessageData.ofFormat ∘ format⟩
 instance : ToMessageData Expr          := ⟨MessageData.ofExpr⟩
--- a/src/Lean/Parser/Tactic/Doc.lean
+++ b/src/Lean/Parser/Tactic/Doc.lean
@ -262,10 +262,10 @@ def getTacticExtensionString (env : Environment) (tactic : Name) : String := Id.
  if exts.size == 0 then ""
  else "\n\nExtensions:\n\n" ++ String.join (exts.toList.map bullet) |>.trimAsciiEnd |>.copy
 where
-  indentLine (str: String) : String :=
-    (if str.all (·.isWhitespace) then str else "   " ++ str) ++ "\n"
+  indentLine (str : String.Slice) : String :=
+    (if str.all Char.isWhitespace then str.copy else "   " ++ str) ++ "\n"
  bullet (str : String) : String :=
-    let lines := str.splitOn "\n"
+    let lines := str.split '\n' |>.toList
    match lines with
    | [] => ""
    | [l] => " * " ++ l ++ "\n\n"
--- a/src/Lean/Parser/Term/Doc.lean
+++ b/src/Lean/Parser/Term/Doc.lean
@ -7,6 +7,7 @@ module

 prelude
 public import Lean.Parser.Extension
+import Init.Data.String.Search

 public section

@ -78,7 +79,7 @@ where
    (if str.all (·.isWhitespace) then str else "   " ++ str) ++ "\n"
  bullet (spelling : RecommendedSpelling) : String :=
    let firstLine := s!" * The recommended spelling of `{spelling.«notation»}` in identifiers is `{spelling.recommendedSpelling}`"
-    let additionalInfoLines := spelling.additionalInformation?.map (·.splitOn "\n")
+    let additionalInfoLines := spelling.additionalInformation?.map (·.split '\n' |>.toStringList)
    match additionalInfoLines with
    | none | some [] => firstLine ++ ".\n\n"
    | some [l] => firstLine ++ s!" ({l.trimAsciiEnd}).\n\n"
--- a/src/Lean/Server/Test/Runner.lean
+++ b/src/Lean/Server/Test/Runner.lean
@ -433,7 +433,7 @@ def processEdit : RunnerM Unit := do
    | "insert" => pure ("\"\"", s.params)
    | "change" =>
      -- TODO: allow spaces in strings
-      let [delete, insert] := s.params.splitOn " "
+      let [delete, insert] := s.params.split ' ' |>.toStringList
        | throw <| IO.userError s!"expected two arguments in {s.params}"
      pure (delete, insert)
    | _ => unreachable!
@ -670,7 +670,7 @@ def processDirective (ws directive : String) (directiveTargetLineNo : Nat) : Run
  | _ => processGenericRequest

 def processLine (line : String) : RunnerM Unit := do
-  let [ws, directive] := line.splitOn "--"
+  let [ws, directive] := line.split "--" |>.toStringList
    | skipLineWithoutDirective
      return
  let directiveTargetLineNo ←
@ -730,12 +730,12 @@ partial def main (args : List String) : IO Unit := do
      requestNo := 1
    }
    RunnerM.run (init := init) do
-      for text in text.splitOn "-- RESET" do
+      for text in text.split "-- RESET" do
        Ipc.writeNotification ⟨"textDocument/didOpen", {
-          textDocument := { uri := uri, languageId := "lean", version := 1, text := text } : DidOpenTextDocumentParams }⟩
+          textDocument := { uri := uri, languageId := "lean", version := 1, text := text.copy } : DidOpenTextDocumentParams }⟩
        reset
-        for line in text.splitOn "\n" do
-          processLine line
+        for line in text.split '\n' do
+          processLine line.copy
        let _ ← Ipc.collectDiagnostics (← get).requestNo uri ((← get).versionNo - 1)
        advanceRequestNo
        Ipc.writeNotification ⟨"textDocument/didClose", {
--- a/src/Lean/SubExpr.lean
+++ b/src/Lean/SubExpr.lean
@ -131,7 +131,7 @@ open Except in
 protected def fromString? : String → Except String Pos
  | "/" => Except.ok Pos.root
  | s =>
-    match String.splitOn s "/" with
+    match String.split s '/' |>.toStringList with
    | "" :: tail => Pos.ofArray <$> tail.toArray.mapM ofStringCoord
    | ss => error s!"malformed {ss}"

--- a/src/lake/Lake/Build/Actions.lean
+++ b/src/lake/Lake/Build/Actions.lean
@ -12,6 +12,7 @@ import Lake.Util.Proc
 import Lake.Util.NativeLib
 import Lake.Util.FilePath
 import Lake.Util.IO
+import Init.Data.String.Search

 /-! # Common Build Actions
 Low level actions to build common Lean artifacts via the Lean toolchain.
@ -56,7 +57,8 @@ public def compileLeanModule
    ]
  }
  unless out.stdout.isEmpty do
-    let txt ← out.stdout.splitToList (· == '\n') |>.foldlM (init := "") fun txt ln => do
+    let txt ← out.stdout.split '\n' |>.foldM (init := "") fun (txt : String) ln => do
+      let ln := ln.copy
      if let .ok (msg : SerialMessage) := Json.parse ln >>= fromJson? then
        unless txt.isEmpty do
          logInfo s!"stdout:\n{txt}"
--- a/src/lake/Lake/Build/Key.lean
+++ b/src/lake/Lake/Build/Key.lean
@ -10,6 +10,7 @@ public import Init.Data.Order
 import Lake.Util.Name
 import Lake.Config.Kinds
 import Init.Data.String.TakeDrop
+import Init.Data.String.Search

 namespace Lake
 open Lean (Name)
@ -52,7 +53,7 @@ Uses the same syntax as the `lake build` / `lake query` CLI.
 public def parse (s : String) : Except String PartialBuildKey := do
  if s.isEmpty then
    throw "ill-formed target: empty string"
-  match s.splitOn ":" with
+  match s.split ':' |>.toStringList with
  | target :: facets =>
    let target ← parseTarget target
    facets.foldlM (init := target) fun target facet => do
@ -65,7 +66,7 @@ public def parse (s : String) : Except String PartialBuildKey := do
    unreachable!
 where
  parseTarget s := do
-    match s.splitOn "/" with
+    match s.split '/' |>.toList with
    | [target] =>
      if target.isEmpty then
        return .package .anonymous
@ -80,11 +81,11 @@ where
      else
        parsePackageTarget .anonymous target
    | [pkg, target] =>
-      let pkg := if pkg.startsWith "@" then pkg.drop 1 |>.copy else pkg
+      let pkg := if pkg.startsWith "@" then pkg.drop 1 else pkg
      if pkg.isEmpty then
        parsePackageTarget .anonymous target
      else
-        parsePackageTarget (stringToLegalOrSimpleName pkg) target
+        parsePackageTarget (stringToLegalOrSimpleName pkg.copy) target
    | _ =>
      throw "ill-formed target: too many '/'"
  parsePackageTarget pkg target :=
@ -94,7 +95,7 @@ where
      let target := target.drop 1 |>.copy |> stringToLegalOrSimpleName
      return .packageModule pkg target
    else
-      let target := stringToLegalOrSimpleName target
+      let target := stringToLegalOrSimpleName target.copy
      return .packageTarget pkg target

 public def toString : (self : PartialBuildKey) → String
--- a/src/lake/Lake/CLI/Actions.lean
+++ b/src/lake/Lake/CLI/Actions.lean
@ -13,6 +13,7 @@ import Lake.Build.Targets
 import Lake.Build.Module
 import Lake.CLI.Build
 import Lake.Util.Proc
+import Init.Data.String.Search

 namespace Lake
 open Lean (Name)
@ -60,7 +61,7 @@ public def Package.resolveDriver
  if driver.isEmpty then
    error s!"{pkgName}: no {kind} driver configured"
  else
-    match driver.splitToList (· == '/') with
+    match driver.split '/' |>.toStringList with
    | [pkg, driver] =>
      let some pkg ← findPackage? pkg.toName
        | error s!"{pkgName}: unknown {kind} driver package '{pkg}'"
--- a/src/lake/Lake/CLI/Build.lean
+++ b/src/lake/Lake/CLI/Build.lean
@ -13,6 +13,7 @@ import Lake.Build.Infos
 import Lake.Build.Job.Monad
 public import Lake.Build.Job.Register
 import Lake.Util.IO
+import Init.Data.String.Search

 open System Lean

@ -179,26 +180,26 @@ def resolveTargetInWorkspace
 private def resolveTargetLikeSpec
  (ws : Workspace) (spec : String) (facet : Name) (isMaybePath explicit := false)
 : Except CliError (Array BuildSpec) := do
-  match spec.splitOn "/" with
+  match spec.split '/' |>.toList with
  | [spec] =>
    if spec.isEmpty then
      resolvePackageTarget ws ws.root facet
    else if explicit then
-      resolvePackageTarget ws (← parsePackageSpec ws spec) facet
+      resolvePackageTarget ws (← parsePackageSpec ws spec.copy) facet
    else
-      resolveTargetInWorkspace ws (stringToLegalOrSimpleName spec) facet
+      resolveTargetInWorkspace ws (stringToLegalOrSimpleName spec.copy) facet
  | [pkgSpec, targetSpec] =>
-    let pkg ← parsePackageSpec ws pkgSpec
+    let pkg ← parsePackageSpec ws pkgSpec.copy
    if targetSpec.isEmpty then
      resolvePackageTarget ws pkg facet
    else if targetSpec.startsWith "+" then
-      let mod := targetSpec.drop 1 |>.toName
+      let mod := targetSpec.drop 1 |>.copy.toName
      if let some mod := pkg.findTargetModule? mod then
        Array.singleton <$> resolveModuleTarget ws mod facet
      else
        throw <| CliError.unknownModule mod
    else
-      resolveTargetInPackage ws pkg (stringToLegalOrSimpleName targetSpec) facet
+      resolveTargetInPackage ws pkg (stringToLegalOrSimpleName targetSpec.copy) facet
  | _ =>
    if isMaybePath then
      throw <| CliError.unknownModulePath spec
@ -230,16 +231,16 @@ private def resolveTargetBaseSpec
 public def parseExeTargetSpec
  (ws : Workspace) (spec : String)
 : Except CliError LeanExe := do
-  match spec.splitOn "/" with
+  match spec.split '/' |>.toList with
  | [targetSpec] =>
-    let targetName := stringToLegalOrSimpleName targetSpec
+    let targetName := stringToLegalOrSimpleName targetSpec.copy
    match ws.findLeanExe? targetName with
    | some exe => return exe
    | none => throw <| CliError.unknownExe spec
  | [pkgSpec, targetSpec] =>
-    let pkgSpec := if pkgSpec.startsWith "@" then pkgSpec.drop 1 |>.copy else pkgSpec
-    let pkg ← parsePackageSpec ws pkgSpec
-    let targetName := stringToLegalOrSimpleName targetSpec
+    let pkgSpec := if pkgSpec.startsWith "@" then pkgSpec.drop 1 else pkgSpec
+    let pkg ← parsePackageSpec ws pkgSpec.copy
+    let targetName := stringToLegalOrSimpleName targetSpec.copy
    match pkg.findLeanExe? targetName with
    | some exe => return exe
    | none => throw <| CliError.unknownExe spec
@ -249,7 +250,7 @@ public def parseExeTargetSpec
 public def parseTargetSpec
  (ws : Workspace) (spec : String)
 : EIO CliError (Array BuildSpec) := do
-  match spec.splitOn ":" with
+  match spec.split ':' |>.toStringList with
  | [spec] =>
    resolveTargetBaseSpec ws spec .anonymous
  | [rootSpec, facet] =>
--- a/src/lake/Lake/CLI/Main.lean
+++ b/src/lake/Lake/CLI/Main.lean
@ -30,6 +30,7 @@ import Lake.CLI.Error
 import Lake.CLI.Actions
 import Lake.CLI.Translate
 import Lake.CLI.Serve
+import Init.Data.String.Search

 -- # CLI

@ -220,7 +221,7 @@ def lakeShortOption : (opt : Char) → CliM PUnit
 def validateRepo? (repo : String) : Option String := Id.run do
  unless repo.all isValidRepoChar do
    return "invalid characters in repository name"
-  match repo.splitToList (· == '/') with
+  match repo.split '/' |>.toStringList with
  | [owner, name] =>
    if owner.length > 39 then
      return "invalid repository name; owner must be at most 390 characters long"
@ -323,7 +324,7 @@ def verifyInstall (opts : LakeOptions) : ExceptT CliError MainM PUnit := do
  verifyLeanVersion leanInstall

 def parseScriptSpec (ws : Workspace) (spec : String) : Except CliError Script :=
-  match spec.splitOn "/" with
+  match spec.split '/' |>.toStringList with
  | [scriptName] =>
    match ws.findScript? (stringToLegalOrSimpleName scriptName) with
    | some script => return script
@ -352,7 +353,7 @@ def parseLangSpec (spec : String) : Except CliError ConfigLang :=
    throw <| CliError.unknownConfigLang spec

 def parseTemplateLangSpec (spec : String) : Except CliError (InitTemplate × ConfigLang) := do
-  match spec.splitOn "." with
+  match spec.split '.' |>.toStringList with
  | [tmp, lang] => return (← parseTemplateSpec tmp, ← parseLangSpec lang)
  | [tmp] => return (← parseTemplateSpec tmp, default)
  | _ => return default
--- a/src/lake/Lake/Config/Cache.lean
+++ b/src/lake/Lake/Config/Cache.lean
@ -14,6 +14,7 @@ import Lake.Util.Url
 import Lake.Util.Proc
 import Lake.Util.Reservoir
 import Lake.Util.IO
+import Init.Data.String.Search

 open Lean System

@ -369,8 +370,8 @@ toolchain and platform information in a manner similar to Reservoir.
 public def artifactContentType : String := "application/vnd.reservoir.artifact"

 private def appendScope (endpoint : String) (scope : String) : String :=
-  scope.splitToList (· == '/') |>.foldl (init := endpoint) fun s component =>
-    uriEncode component s |>.push '/'
+  scope.split '/' |>.fold (init := endpoint) fun s component =>
+    uriEncode component.copy s |>.push '/'

 private def s3ArtifactUrl (contentHash : Hash) (service : CacheService) (scope : String)  : String :=
  appendScope s!"{service.artifactEndpoint}/" scope ++ s!"{contentHash.hex}.art"
--- a/src/lake/Lake/Toml/Data/DateTime.lean
+++ b/src/lake/Lake/Toml/Data/DateTime.lean
@ -9,6 +9,7 @@ prelude
 public import Lake.Util.Date
 import Lake.Util.String
 import Init.Data.String.TakeDrop
+import Init.Data.String.Search

 /-!
 # TOML Date-Time
@ -63,12 +64,12 @@ public def ofValid? (hour minute second : Nat) : Option Time := do
  return {hour, minute, second}

 public def ofString? (t : String) : Option Time := do
-  match t.splitToList (· == ':') with
+  match t.split ':' |>.toList with
  | [h,m,s] =>
-    match s.splitToList (· == '.') with
+    match s.split '.' |>.toList with
    | [s,f] =>
      let time ← ofValid? (← h.toNat?) (← m.toNat?) (← s.toNat?)
-      return {time with fracExponent := f.length-1, fracMantissa := ← f.toNat?}
+      return {time with fracExponent := f.copy.length-1, fracMantissa := ← f.toNat?}
    | [s] =>
      ofValid? (← h.toNat?) (← m.toNat?) (← s.toNat?)
    | _ => none
@ -101,22 +102,22 @@ public instance : Coe Time DateTime := ⟨DateTime.localTime⟩
 namespace DateTime

 public def ofString? (dt : String) : Option DateTime := do
-  match dt.splitToList (fun c => c == 'T' || c == 't' || c == ' ') with
+  match dt.split (fun c => c == 'T' || c == 't' || c == ' ') |>.toList with
  | [d,t] =>
-    let d ← Date.ofString? d
+    let d ← Date.ofString? d.copy
    if t.back == 'Z' || t.back == 'z' then
      return offsetDateTime d (← Time.ofString? <| t.dropEnd 1 |>.copy)
-    else if let [t,o] := t.splitToList (· == '+') then
+    else if let [t,o] := t.split '+' |>.toStringList then
      return offsetDateTime d (← Time.ofString? t) <| some (false, ← Time.ofString? o)
-    else if let [t,o] := t.splitToList (· == '-') then
+    else if let [t,o] := t.split '-' |>.toStringList then
      return offsetDateTime d (← Time.ofString? t) <| some (true, ← Time.ofString? o)
    else
-      return localDateTime d (← Time.ofString? t)
+      return localDateTime d (← Time.ofString? t.copy)
  | [x] =>
-    if x.any (· == ':') then
-      return localTime (← Time.ofString? x)
+    if x.contains ':' then
+      return localTime (← Time.ofString? x.copy)
    else
-      return localDate (← Date.ofString? x)
+      return localDate (← Date.ofString? x.copy)
  | _ => none

 public protected def toString (dt : DateTime) : String :=
--- a/src/lake/Lake/Toml/Elab/Value.lean
+++ b/src/lake/Lake/Toml/Elab/Value.lean
@ -67,7 +67,7 @@ def decodeMantissa (s : String) : Nat × Nat :=
  (m, if e ≥ s.length then 0 else e)

 def decodeFrExp (s : String) : Nat × Int :=
-  match s.splitToList (fun c => c == 'E' || c == 'e') with
+  match s.split (fun c => c == 'E' || c == 'e') |>.toStringList with
  | [m, exp] =>
    let exp := decodeDecInt exp
    let (m, dotExp) := decodeMantissa m
--- a/src/lake/Lake/Util/Casing.lean
+++ b/src/lake/Lake/Util/Casing.lean
@ -8,6 +8,7 @@ module
 prelude
 public import Init.Data.String.Basic
 import Init.Data.String.Modify
+import Init.Data.String.Search

 namespace Lake

@ -15,8 +16,8 @@ open Lean (Name)

 /-- Converts a snake case, kebab case, or lower camel case `String` to upper camel case. -/
 public def toUpperCamelCaseString (str : String) : String :=
-  let parts := str.splitToList fun chr => chr == '_' || chr == '-'
-  String.join <| parts.map (·.capitalize)
+  let parts := str.split fun chr => chr == '_' || chr == '-'
+  String.join <| parts.map (·.copy.capitalize) |>.toList

 /-- Converts a snake case, kebab case, or lower camel case `Name` to upper camel case. -/
 public def toUpperCamelCase (name : Name) : Name :=
--- a/src/lake/Lake/Util/Date.lean
+++ b/src/lake/Lake/Util/Date.lean
@ -8,7 +8,7 @@ module
 prelude
 public import Init.Data.Ord.Basic
 import Lake.Util.String
-import Init.Data.String.Basic
+import Init.Data.String.Search

 /-!
 #  Date
@ -55,7 +55,7 @@ public def ofValid? (year month day : Nat) : Option Date := do
  return {year, month, day}

 public def ofString? (t : String) : Option Date := do
-  match t.splitToList (· == '-') with
+  match t.split '-' |>.toList with
  | [y,m,d] =>
    ofValid? (← y.toNat?) (← m.toNat?) (← d.toNat?)
  | _ => none
--- a/src/lake/Lake/Util/Git.lean
+++ b/src/lake/Lake/Util/Git.lean
@ -9,6 +9,7 @@ prelude
 public import Init.Data.ToString
 public import Lake.Util.Proc
 import Init.Data.String.TakeDrop
+import Init.Data.String.Search

 open System
 namespace Lake
@ -102,7 +103,7 @@ public def getHeadRevisions (repo : GitRepo) (n : Nat := 0) : LogIO (Array Strin
  let args := #["rev-list", "HEAD"]
  let args := if n != 0 then args ++ #["-n", toString n] else args
  let revs ← repo.captureGit args
-  return revs.splitToList (· == '\n') |>.toArray
+  return revs.split '\n' |>.toStringArray

 public def resolveRemoteRevision (rev : String) (remote := Git.defaultRemote) (repo : GitRepo) : LogIO String := do
  if Git.isFullObjectName rev then return rev
@ -121,7 +122,7 @@ public def revisionExists (rev : String) (repo : GitRepo) : BaseIO Bool := do

 public def getTags (repo : GitRepo) : BaseIO (List String) := do
  let some out ← repo.captureGit? #["tag"] | return []
-  return out.splitToList (· == '\n')
+  return out.split '\n' |>.toStringList

 public def findTag? (rev : String := "HEAD") (repo : GitRepo) : BaseIO (Option String) := do
  repo.captureGit? #["describe", "--tags", "--exact-match", rev]
--- a/tests/lean/run/diff.lean
+++ b/tests/lean/run/diff.lean
@ -86,12 +86,12 @@ These tests find least common subsequences.

 /-- info: #["A", "A"] -/
 #guard_msgs in
-#eval lcs ("A,A,B".split (·==',') |>.toArray).toSubarray ("A,A,X".split (·==',') |>.toArray).toSubarray
+#eval lcs ("A,A,B".split (·==',') |>.toStringArray).toSubarray ("A,A,X".split (·==',') |>.toStringArray).toSubarray

 /-- info: #["A", "D", "E", "F"] -/
 #guard_msgs in
-#eval lcs ("A,C,D,E,F,G".split (·==',') |>.toArray).toSubarray ("A,Y,Z,D,E,F".split (·==',') |>.toArray).toSubarray
+#eval lcs ("A,C,D,E,F,G".split (·==',') |>.toStringArray).toSubarray ("A,Y,Z,D,E,F".split (·==',') |>.toStringArray).toSubarray

 /-- info: #["A", "A", "D", "E", "F"] -/
 #guard_msgs in
-#eval lcs ("A,A,B,C,D,E,F,G".split (·==',') |>.toArray).toSubarray ("A,A,X,Y,Z,D,E,F".split (·==',') |>.toArray).toSubarray
+#eval lcs ("A,A,B,C,D,E,F,G".split (·==',') |>.toStringArray).toSubarray ("A,A,X,Y,Z,D,E,F".split (·==',') |>.toStringArray).toSubarray
--- a/tests/lean/run/string_gaps.lean
+++ b/tests/lean/run/string_gaps.lean
@ -143,15 +143,15 @@ for indented multiline string literals.
 -/

 def String.dedent (s : String) : Option String :=
-  let parts := s.split (· == '\n') |>.map String.trimLeft
+  let parts := s.split '\n' |>.map String.Slice.trimAsciiStart |>.toList
  match parts with
  | [] => ""
-  | [p] => p
+  | [p] => p.copy
  | p₀ :: parts =>
    if !parts.all (·.startsWith "|") then
      none
    else
-      p₀ ++ "\n" ++ String.intercalate "\n" (parts.map fun p => p.drop 1 |>.copy)
+      p₀.copy ++ "\n" ++ "\n".toSlice.intercalate (parts.map fun p => p.drop 1)

 elab "d!" s:str : term => do
  let some s := s.raw.isStrLit? | Lean.Elab.throwIllFormedSyntax