feat: upstream List.splitOn(P) (#12702)

This PR upstreams `List.splitOn` and `List.splitOnP` from Batteries/mathlib. The function `splitOnP.go` is factored out to `splitOnPPrepend`, because it is useful to state induction hypotheses in terms of `splitOnPPrepend`.
2026-02-26 14:45:34 +01:00 · 2026-02-26 14:45:34 +01:00 · 1e4894b431
commit 1e4894b431
parent 846420daba
6 changed files with 274 additions and 1 deletions
--- a/src/Init/Data/List.lean
+++ b/src/Init/Data/List.lean
@ -37,3 +37,4 @@ public import Init.Data.List.Lex
 public import Init.Data.List.Range
 public import Init.Data.List.Scan
 public import Init.Data.List.ControlImpl
+public import Init.Data.List.SplitOn
--- a/src/Init/Data/List/Basic.lean
+++ b/src/Init/Data/List/Basic.lean
@ -2164,7 +2164,9 @@ def intersperse (sep : α) : (l : List α) → List α
  | x::xs => x :: sep :: intersperse sep xs

@[simp] theorem intersperse_nil {sep : α} : ([] : List α).intersperse sep = [] := rfl
-@[simp] theorem intersperse_single {x : α} {sep : α} : [x].intersperse sep = [x] := rfl
+@[simp] theorem intersperse_singleton {x : α} {sep : α} : [x].intersperse sep = [x] := rfl
+@[deprecated intersperse_single (since := "2026-02-26")]
+theorem intersperse_single {x : α} {sep : α} : [x].intersperse sep = [x] := rfl
@[simp] theorem intersperse_cons₂ {x : α} {y : α} {zs : List α} {sep : α} :
    (x::y::zs).intersperse sep = x::sep::((y::zs).intersperse sep) := rfl

--- a/src/Init/Data/List/Lemmas.lean
+++ b/src/Init/Data/List/Lemmas.lean
@ -3774,4 +3774,28 @@ theorem get_mem : ∀ (l : List α) n, get l n ∈ l
 theorem mem_iff_get {a} {l : List α} : a ∈ l ↔ ∃ n, get l n = a :=
  ⟨get_of_mem, fun ⟨_, e⟩ => e ▸ get_mem ..⟩

+/-! ### `intercalate` -/
+
+@[simp]
+theorem intercalate_nil {ys : List α} : ys.intercalate [] = [] := rfl
+
+@[simp]
+theorem intercalate_singleton {ys xs : List α} : ys.intercalate [xs] = xs := by
+  simp [intercalate]
+
+@[simp]
+theorem intercalate_cons_cons {ys l l' : List α} {zs : List (List α)} :
+    ys.intercalate (l :: l' :: zs) = l ++ ys ++ ys.intercalate (l' :: zs) := by
+  simp [intercalate]
+
+@[simp]
+theorem intercalate_cons_cons_left {ys l : List α} {x : α} {zs : List (List α)} :
+    ys.intercalate ((x :: l) :: zs) = x :: ys.intercalate (l :: zs) := by
+  cases zs <;> simp
+
+theorem intercalate_cons_of_ne_nil {ys l : List α} {zs : List (List α)} (h : zs ≠ []) :
+    ys.intercalate (l :: zs) = l ++ ys ++ ys.intercalate zs :=
+  match zs, h with
+  | l'::zs, _ => by simp
+
 end List
--- a/src/Init/Data/List/SplitOn.lean
+++ b/src/Init/Data/List/SplitOn.lean
@ -0,0 +1,10 @@
+/-
+Copyright (c) 2026 Lean FRO, LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Markus Himmel
+-/
+module
+
+prelude
+public import Init.Data.List.SplitOn.Basic
+public import Init.Data.List.SplitOn.Lemmas
--- a/src/Init/Data/List/SplitOn/Basic.lean
+++ b/src/Init/Data/List/SplitOn/Basic.lean
@ -0,0 +1,70 @@
+/-
+Copyright (c) 2016 Microsoft Corporation. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Leonardo de Moura
+-/
+module
+
+prelude
+public import Init.Data.List.Basic
+public import Init.NotationExtra
+import Init.Data.Array.Bootstrap
+import Init.Data.List.Lemmas
+
+public section
+
+set_option doc.verso true
+
+namespace List
+
+/--
+Split a list at every element satisfying a predicate, and then prepend {lean}`acc.reverse` to the
+first element of the result.
+
+* {lean}`[1, 1, 2, 3, 2, 4, 4].splitOnPPrepend (· == 2) [0, 5] = [[5, 0, 1, 1], [3], [4, 4]]`
+-/
+noncomputable def splitOnPPrepend (p : α → Bool) : (l : List α) → (acc : List α) → List (List α)
+| [], acc => [acc.reverse]
+| a :: t, acc => if p a then acc.reverse :: splitOnPPrepend p t [] else splitOnPPrepend p t (a::acc)
+
+/--
+Split a list at every element satisfying a predicate. The separators are not in the result.
+
+Examples:
+* {lean}`[1, 1, 2, 3, 2, 4, 4].splitOnP (· == 2) = [[1, 1], [3], [4, 4]]`
+-/
+noncomputable def splitOnP (p : α → Bool) (l : List α) : List (List α) :=
+  splitOnPPrepend p l []
+
+@[deprecated splitOnPPrepend (since := "2026-02-26")]
+noncomputable def splitOnP.go (p : α → Bool) (l acc : List α) : List (List α) :=
+  splitOnPPrepend p l acc
+
+/-- Tail recursive version of {name}`splitOnP`. -/
+@[inline]
+def splitOnPTR (p : α → Bool) (l : List α) : List (List α) := go l #[] #[] where
+  @[specialize] go : List α → Array α → Array (List α) → List (List α)
+  | [], acc, r => r.toListAppend [acc.toList]
+  | a :: t, acc, r => bif p a then go t #[] (r.push acc.toList) else go t (acc.push a) r
+
+@[csimp] theorem splitOnP_eq_splitOnPTR : @splitOnP = @splitOnPTR := by
+  funext α P l
+  simp only [splitOnPTR]
+  suffices ∀ xs acc r,
+    splitOnPTR.go P xs acc r = r.toList ++ splitOnPPrepend P xs acc.toList.reverse from
+      (this l #[] #[]).symm
+  intro xs acc r
+  induction xs generalizing acc r with
+  | nil => simp [splitOnPPrepend, splitOnPTR.go]
+  | cons x xs IH => cases h : P x <;> simp [splitOnPPrepend, splitOnPTR.go, *]
+
+/--
+Split a list at every occurrence of a separator element. The separators are not in the result.
+
+Examples:
+* {lean}`[1, 1, 2, 3, 2, 4, 4].splitOn 2 = [[1, 1], [3], [4, 4]]`
+-/
+@[inline] def splitOn [BEq α] (a : α) (as : List α) : List (List α) :=
+  as.splitOnP (· == a)
+
+end List
--- a/src/Init/Data/List/SplitOn/Lemmas.lean
+++ b/src/Init/Data/List/SplitOn/Lemmas.lean
@ -0,0 +1,166 @@
+/-
+Copyright (c) 2014 Parikshit Khanna. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Parikshit Khanna, Jeremy Avigad, Leonardo de Moura, Floris van Doorn, Mario Carneiro, Markus Himmel
+-/
+module
+
+prelude
+public import Init.Data.List.SplitOn.Basic
+import all Init.Data.List.SplitOn.Basic
+import Init.Data.List.Nat.Modify
+import Init.ByCases
+
+public section
+
+namespace List
+
+variable {p : α → Bool} {xs : List α} {ls : List (List α)}
+
+@[simp]
+theorem splitOn_nil [BEq α] (a : α) : [].splitOn a = [[]] :=
+  (rfl)
+
+@[simp]
+theorem splitOnP_nil : [].splitOnP p = [[]] :=
+  (rfl)
+
+@[simp]
+theorem splitOnPPrepend_ne_nil (p : α → Bool) (xs acc : List α) : splitOnPPrepend p xs acc ≠ [] := by
+  fun_induction splitOnPPrepend <;> simp_all
+
+@[deprecated splitOnPPrepend_ne_nil (since := "2026-02-26")]
+theorem splitOnP.go_ne_nil (p : α → Bool) (xs acc : List α) : splitOnPPrepend p xs acc ≠ [] :=
+  splitOnPPrepend_ne_nil p xs acc
+
+@[simp] theorem splitOnPPrepend_nil {acc : List α} : splitOnPPrepend p [] acc = [acc.reverse] := (rfl)
+@[simp] theorem splitOnPPrepend_nil_right : splitOnPPrepend p xs [] = splitOnP p xs := (rfl)
+theorem splitOnP_eq_splitOnPPrepend : splitOnP p xs = splitOnPPrepend p xs [] := (rfl)
+
+theorem splitOnPPrepend_cons_eq_if {x : α} {xs acc : List α} :
+    splitOnPPrepend p (x :: xs) acc =
+      if p x then acc.reverse :: splitOnP p xs else splitOnPPrepend p xs (x :: acc) := by
+  simp [splitOnPPrepend]
+
+theorem splitOnPPrepend_cons_pos {p : α → Bool} {a : α} {l acc : List α} (h : p a) :
+    splitOnPPrepend p (a :: l) acc = acc.reverse :: splitOnP p l := by
+  simp [splitOnPPrepend, h]
+
+theorem splitOnPPrepend_cons_neg {p : α → Bool} {a : α} {l acc : List α} (h : p a = false) :
+    splitOnPPrepend p (a :: l) acc = splitOnPPrepend p l (a :: acc) := by
+  simp [splitOnPPrepend, h]
+
+theorem splitOnP_cons_eq_if_splitOnPPrepend {x : α} {xs : List α} :
+    splitOnP p (x :: xs) = if p x then [] :: splitOnP p xs else splitOnPPrepend p xs [x] := by
+  simp [splitOnPPrepend_cons_eq_if, ← splitOnPPrepend_nil_right]
+
+theorem splitOnPPrepend_eq_modifyHead {xs acc : List α} :
+    splitOnPPrepend p xs acc = modifyHead (acc.reverse ++ ·) (splitOnP p xs) := by
+  induction xs generalizing acc with
+  | nil => simp
+  | cons hd tl ih =>
+    simp [splitOnPPrepend_cons_eq_if, splitOnP_cons_eq_if_splitOnPPrepend, ih]
+    split <;> simp <;> congr
+
+@[deprecated splitOnPPrepend_eq_modifyHead (since := "2026-02-26")]
+theorem splitOnP.go_acc {xs acc : List α} :
+    splitOnPPrepend p xs acc = modifyHead (acc.reverse ++ ·) (splitOnP p xs) :=
+  splitOnPPrepend_eq_modifyHead
+
+theorem splitOnP_ne_nil (p : α → Bool) (xs : List α) : xs.splitOnP p ≠ [] :=
+  splitOnPPrepend_ne_nil p xs []
+
+theorem splitOnP_cons_eq_if_modifyHead (x : α) (xs : List α) :
+    (x :: xs).splitOnP p =
+      if p x then [] :: xs.splitOnP p else (xs.splitOnP p).modifyHead (cons x) := by
+  simp [splitOnP_cons_eq_if_splitOnPPrepend, splitOnPPrepend_eq_modifyHead]
+
+@[deprecated splitOnP_cons_eq_if_modifyHead (since := "2026-02-26")]
+theorem splitOnP_cons (x : α) (xs : List α) :
+    (x :: xs).splitOnP p =
+      if p x then [] :: xs.splitOnP p else (xs.splitOnP p).modifyHead (cons x) :=
+  splitOnP_cons_eq_if_modifyHead x xs
+
+/-- The original list `L` can be recovered by flattening the lists produced by `splitOnP p L`,
+interspersed with the elements `L.filter p`. -/
+theorem splitOnP_spec (as : List α) :
+    flatten (zipWith (· ++ ·) (splitOnP p as) (((as.filter p).map fun x => [x]) ++ [[]])) = as := by
+  induction as with
+  | nil => simp
+  | cons a as' ih =>
+    rw [splitOnP_cons_eq_if_modifyHead]
+    split <;> simp [*, flatten_zipWith, splitOnP_ne_nil]
+where
+  flatten_zipWith {xs ys : List (List α)} {a : α} (hxs : xs ≠ []) (hys : ys ≠ []) :
+      flatten (zipWith (fun x x_1 => x ++ x_1) (modifyHead (cons a) xs) ys) =
+        a :: flatten (zipWith (fun x x_1 => x ++ x_1) xs ys) := by
+    cases xs <;> cases ys <;> simp_all
+
+/-- If no element satisfies `p` in the list `xs`, then `xs.splitOnP p = [xs]` -/
+theorem splitOnP_eq_singleton (h : ∀ x ∈ xs, p x = false) : xs.splitOnP p = [xs] := by
+  induction xs with
+  | nil => simp
+  | cons hd tl ih =>
+    simp only [mem_cons, forall_eq_or_imp] at h
+    simp [splitOnP_cons_eq_if_modifyHead, h.1, ih h.2]
+
+@[deprecated splitOnP_eq_singleton (since := "2026-02-26")]
+theorem splitOnP_eq_single (h : ∀ x ∈ xs, p x = false) : xs.splitOnP p = [xs] :=
+  splitOnP_eq_singleton h
+
+/-- When a list of the form `[...xs, sep, ...as]` is split at the `sep` element satisfying `p`,
+the result is the concatenation of `splitOnP` called on `xs` and `as` -/
+theorem splitOnP_append_cons (xs as : List α) {sep : α} (hsep : p sep) :
+    (xs ++ sep :: as).splitOnP p = List.splitOnP p xs ++ List.splitOnP p as := by
+  induction xs with
+  | nil => simp [splitOnP_cons_eq_if_modifyHead, hsep]
+  | cons hd tl ih =>
+    obtain ⟨hd1, tl1, h1'⟩ := List.exists_cons_of_ne_nil (List.splitOnP_ne_nil (p := p) (xs := tl))
+    by_cases hPh : p hd <;> simp [splitOnP_cons_eq_if_modifyHead, *]
+
+/-- When a list of the form `[...xs, sep, ...as]` is split on `p`, the first element is `xs`,
+  assuming no element in `xs` satisfies `p` but `sep` does satisfy `p` -/
+theorem splitOnP_append_cons_of_forall_mem (h : ∀ x ∈ xs, p x = false) (sep : α)
+    (hsep : p sep = true) (as : List α) : (xs ++ sep :: as).splitOnP p = xs :: as.splitOnP p := by
+  rw [splitOnP_append_cons xs as hsep, splitOnP_eq_singleton h, singleton_append]
+
+@[deprecated splitOnP_append_cons_of_forall_mem (since := "2026-02-26")]
+theorem splitOnP_first (h : ∀ x ∈ xs, p x = false) (sep : α)
+    (hsep : p sep = true) (as : List α) : (xs ++ sep :: as).splitOnP p = xs :: as.splitOnP p :=
+  splitOnP_append_cons_of_forall_mem h sep hsep as
+
+@[simp]
+theorem splitOn_eq_splitOnP [BEq α] {x : α} {xs : List α} : xs.splitOn x = xs.splitOnP (· == x) :=
+  (rfl)
+
+/-- `intercalate [x]` is the left inverse of `splitOn x` -/
+@[simp]
+theorem intercalate_splitOn [BEq α] [LawfulBEq α] (x : α) : [x].intercalate (xs.splitOn x) = xs := by
+  simp only [splitOn_eq_splitOnP]
+  induction xs with
+  | nil => simp
+  | cons hd tl ih =>
+    simp only [splitOnP_cons_eq_if_modifyHead, beq_iff_eq]
+    split
+    · simp_all [intercalate_cons_of_ne_nil, splitOnP_ne_nil]
+    · have hsp := splitOnP_ne_nil (· == x) tl
+      generalize splitOnP (· == x) tl = ls at *
+      cases ls <;> simp_all
+
+/-- `splitOn x` is the left inverse of `intercalate [x]`, on the domain
+consisting of each nonempty list of lists `ls` whose elements do not contain `x` -/
+theorem splitOn_intercalate [BEq α] [LawfulBEq α] (x : α) (hx : ∀ l ∈ ls, x ∉ l) (hls : ls ≠ []) :
+    ([x].intercalate ls).splitOn x = ls := by
+  simp only [splitOn_eq_splitOnP]
+  induction ls with
+  | nil => simp at hls
+  | cons hd tl ih =>
+    simp only [mem_cons, forall_eq_or_imp] at ⊢ hx
+    have (y) (hy : y ∈ hd) : (y == x) = false := beq_eq_false_iff_ne.2 fun h => absurd hy (h ▸ hx.1)
+    match tl with
+    | [] => simpa using splitOnP_eq_singleton this
+    | t::tl =>
+      simp only [intercalate_cons_cons, append_assoc, cons_append, nil_append]
+      rw [splitOnP_append_cons_of_forall_mem this _ (by simp), ih hx.2 (by simp)]
+
+end List