feat: upstream List.splitOn(P) (#12702)

This PR upstreams `List.splitOn` and `List.splitOnP` from
Batteries/mathlib.

The function `splitOnP.go` is factored out to `splitOnPPrepend`, because
it is useful to state induction hypotheses in terms of
`splitOnPPrepend`.
This commit is contained in:
Markus Himmel 2026-02-26 14:45:34 +01:00 committed by GitHub
parent 846420daba
commit 1e4894b431
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 274 additions and 1 deletions

View file

@ -37,3 +37,4 @@ public import Init.Data.List.Lex
public import Init.Data.List.Range
public import Init.Data.List.Scan
public import Init.Data.List.ControlImpl
public import Init.Data.List.SplitOn

View file

@ -2164,7 +2164,9 @@ def intersperse (sep : α) : (l : List α) → List α
| x::xs => x :: sep :: intersperse sep xs
@[simp] theorem intersperse_nil {sep : α} : ([] : List α).intersperse sep = [] := rfl
@[simp] theorem intersperse_single {x : α} {sep : α} : [x].intersperse sep = [x] := rfl
@[simp] theorem intersperse_singleton {x : α} {sep : α} : [x].intersperse sep = [x] := rfl
@[deprecated intersperse_single (since := "2026-02-26")]
theorem intersperse_single {x : α} {sep : α} : [x].intersperse sep = [x] := rfl
@[simp] theorem intersperse_cons₂ {x : α} {y : α} {zs : List α} {sep : α} :
(x::y::zs).intersperse sep = x::sep::((y::zs).intersperse sep) := rfl

View file

@ -3774,4 +3774,28 @@ theorem get_mem : ∀ (l : List α) n, get l n ∈ l
theorem mem_iff_get {a} {l : List α} : a ∈ l ↔ ∃ n, get l n = a :=
⟨get_of_mem, fun ⟨_, e⟩ => e ▸ get_mem ..⟩
/-! ### `intercalate` -/
@[simp]
theorem intercalate_nil {ys : List α} : ys.intercalate [] = [] := rfl
@[simp]
theorem intercalate_singleton {ys xs : List α} : ys.intercalate [xs] = xs := by
simp [intercalate]
@[simp]
theorem intercalate_cons_cons {ys l l' : List α} {zs : List (List α)} :
ys.intercalate (l :: l' :: zs) = l ++ ys ++ ys.intercalate (l' :: zs) := by
simp [intercalate]
@[simp]
theorem intercalate_cons_cons_left {ys l : List α} {x : α} {zs : List (List α)} :
ys.intercalate ((x :: l) :: zs) = x :: ys.intercalate (l :: zs) := by
cases zs <;> simp
theorem intercalate_cons_of_ne_nil {ys l : List α} {zs : List (List α)} (h : zs ≠ []) :
ys.intercalate (l :: zs) = l ++ ys ++ ys.intercalate zs :=
match zs, h with
| l'::zs, _ => by simp
end List

View file

@ -0,0 +1,10 @@
/-
Copyright (c) 2026 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Markus Himmel
-/
module
prelude
public import Init.Data.List.SplitOn.Basic
public import Init.Data.List.SplitOn.Lemmas

View file

@ -0,0 +1,70 @@
/-
Copyright (c) 2016 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura
-/
module
prelude
public import Init.Data.List.Basic
public import Init.NotationExtra
import Init.Data.Array.Bootstrap
import Init.Data.List.Lemmas
public section
set_option doc.verso true
namespace List
/--
Split a list at every element satisfying a predicate, and then prepend {lean}`acc.reverse` to the
first element of the result.
* {lean}`[1, 1, 2, 3, 2, 4, 4].splitOnPPrepend (· == 2) [0, 5] = [[5, 0, 1, 1], [3], [4, 4]]`
-/
noncomputable def splitOnPPrepend (p : α → Bool) : (l : List α) → (acc : List α) → List (List α)
| [], acc => [acc.reverse]
| a :: t, acc => if p a then acc.reverse :: splitOnPPrepend p t [] else splitOnPPrepend p t (a::acc)
/--
Split a list at every element satisfying a predicate. The separators are not in the result.
Examples:
* {lean}`[1, 1, 2, 3, 2, 4, 4].splitOnP (· == 2) = [[1, 1], [3], [4, 4]]`
-/
noncomputable def splitOnP (p : α → Bool) (l : List α) : List (List α) :=
splitOnPPrepend p l []
@[deprecated splitOnPPrepend (since := "2026-02-26")]
noncomputable def splitOnP.go (p : α → Bool) (l acc : List α) : List (List α) :=
splitOnPPrepend p l acc
/-- Tail recursive version of {name}`splitOnP`. -/
@[inline]
def splitOnPTR (p : α → Bool) (l : List α) : List (List α) := go l #[] #[] where
@[specialize] go : List α → Array α → Array (List α) → List (List α)
| [], acc, r => r.toListAppend [acc.toList]
| a :: t, acc, r => bif p a then go t #[] (r.push acc.toList) else go t (acc.push a) r
@[csimp] theorem splitOnP_eq_splitOnPTR : @splitOnP = @splitOnPTR := by
funext α P l
simp only [splitOnPTR]
suffices ∀ xs acc r,
splitOnPTR.go P xs acc r = r.toList ++ splitOnPPrepend P xs acc.toList.reverse from
(this l #[] #[]).symm
intro xs acc r
induction xs generalizing acc r with
| nil => simp [splitOnPPrepend, splitOnPTR.go]
| cons x xs IH => cases h : P x <;> simp [splitOnPPrepend, splitOnPTR.go, *]
/--
Split a list at every occurrence of a separator element. The separators are not in the result.
Examples:
* {lean}`[1, 1, 2, 3, 2, 4, 4].splitOn 2 = [[1, 1], [3], [4, 4]]`
-/
@[inline] def splitOn [BEq α] (a : α) (as : List α) : List (List α) :=
as.splitOnP (· == a)
end List

View file

@ -0,0 +1,166 @@
/-
Copyright (c) 2014 Parikshit Khanna. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Parikshit Khanna, Jeremy Avigad, Leonardo de Moura, Floris van Doorn, Mario Carneiro, Markus Himmel
-/
module
prelude
public import Init.Data.List.SplitOn.Basic
import all Init.Data.List.SplitOn.Basic
import Init.Data.List.Nat.Modify
import Init.ByCases
public section
namespace List
variable {p : α → Bool} {xs : List α} {ls : List (List α)}
@[simp]
theorem splitOn_nil [BEq α] (a : α) : [].splitOn a = [[]] :=
(rfl)
@[simp]
theorem splitOnP_nil : [].splitOnP p = [[]] :=
(rfl)
@[simp]
theorem splitOnPPrepend_ne_nil (p : α → Bool) (xs acc : List α) : splitOnPPrepend p xs acc ≠ [] := by
fun_induction splitOnPPrepend <;> simp_all
@[deprecated splitOnPPrepend_ne_nil (since := "2026-02-26")]
theorem splitOnP.go_ne_nil (p : α → Bool) (xs acc : List α) : splitOnPPrepend p xs acc ≠ [] :=
splitOnPPrepend_ne_nil p xs acc
@[simp] theorem splitOnPPrepend_nil {acc : List α} : splitOnPPrepend p [] acc = [acc.reverse] := (rfl)
@[simp] theorem splitOnPPrepend_nil_right : splitOnPPrepend p xs [] = splitOnP p xs := (rfl)
theorem splitOnP_eq_splitOnPPrepend : splitOnP p xs = splitOnPPrepend p xs [] := (rfl)
theorem splitOnPPrepend_cons_eq_if {x : α} {xs acc : List α} :
splitOnPPrepend p (x :: xs) acc =
if p x then acc.reverse :: splitOnP p xs else splitOnPPrepend p xs (x :: acc) := by
simp [splitOnPPrepend]
theorem splitOnPPrepend_cons_pos {p : α → Bool} {a : α} {l acc : List α} (h : p a) :
splitOnPPrepend p (a :: l) acc = acc.reverse :: splitOnP p l := by
simp [splitOnPPrepend, h]
theorem splitOnPPrepend_cons_neg {p : α → Bool} {a : α} {l acc : List α} (h : p a = false) :
splitOnPPrepend p (a :: l) acc = splitOnPPrepend p l (a :: acc) := by
simp [splitOnPPrepend, h]
theorem splitOnP_cons_eq_if_splitOnPPrepend {x : α} {xs : List α} :
splitOnP p (x :: xs) = if p x then [] :: splitOnP p xs else splitOnPPrepend p xs [x] := by
simp [splitOnPPrepend_cons_eq_if, ← splitOnPPrepend_nil_right]
theorem splitOnPPrepend_eq_modifyHead {xs acc : List α} :
splitOnPPrepend p xs acc = modifyHead (acc.reverse ++ ·) (splitOnP p xs) := by
induction xs generalizing acc with
| nil => simp
| cons hd tl ih =>
simp [splitOnPPrepend_cons_eq_if, splitOnP_cons_eq_if_splitOnPPrepend, ih]
split <;> simp <;> congr
@[deprecated splitOnPPrepend_eq_modifyHead (since := "2026-02-26")]
theorem splitOnP.go_acc {xs acc : List α} :
splitOnPPrepend p xs acc = modifyHead (acc.reverse ++ ·) (splitOnP p xs) :=
splitOnPPrepend_eq_modifyHead
theorem splitOnP_ne_nil (p : α → Bool) (xs : List α) : xs.splitOnP p ≠ [] :=
splitOnPPrepend_ne_nil p xs []
theorem splitOnP_cons_eq_if_modifyHead (x : α) (xs : List α) :
(x :: xs).splitOnP p =
if p x then [] :: xs.splitOnP p else (xs.splitOnP p).modifyHead (cons x) := by
simp [splitOnP_cons_eq_if_splitOnPPrepend, splitOnPPrepend_eq_modifyHead]
@[deprecated splitOnP_cons_eq_if_modifyHead (since := "2026-02-26")]
theorem splitOnP_cons (x : α) (xs : List α) :
(x :: xs).splitOnP p =
if p x then [] :: xs.splitOnP p else (xs.splitOnP p).modifyHead (cons x) :=
splitOnP_cons_eq_if_modifyHead x xs
/-- The original list `L` can be recovered by flattening the lists produced by `splitOnP p L`,
interspersed with the elements `L.filter p`. -/
theorem splitOnP_spec (as : List α) :
flatten (zipWith (· ++ ·) (splitOnP p as) (((as.filter p).map fun x => [x]) ++ [[]])) = as := by
induction as with
| nil => simp
| cons a as' ih =>
rw [splitOnP_cons_eq_if_modifyHead]
split <;> simp [*, flatten_zipWith, splitOnP_ne_nil]
where
flatten_zipWith {xs ys : List (List α)} {a : α} (hxs : xs ≠ []) (hys : ys ≠ []) :
flatten (zipWith (fun x x_1 => x ++ x_1) (modifyHead (cons a) xs) ys) =
a :: flatten (zipWith (fun x x_1 => x ++ x_1) xs ys) := by
cases xs <;> cases ys <;> simp_all
/-- If no element satisfies `p` in the list `xs`, then `xs.splitOnP p = [xs]` -/
theorem splitOnP_eq_singleton (h : ∀ x ∈ xs, p x = false) : xs.splitOnP p = [xs] := by
induction xs with
| nil => simp
| cons hd tl ih =>
simp only [mem_cons, forall_eq_or_imp] at h
simp [splitOnP_cons_eq_if_modifyHead, h.1, ih h.2]
@[deprecated splitOnP_eq_singleton (since := "2026-02-26")]
theorem splitOnP_eq_single (h : ∀ x ∈ xs, p x = false) : xs.splitOnP p = [xs] :=
splitOnP_eq_singleton h
/-- When a list of the form `[...xs, sep, ...as]` is split at the `sep` element satisfying `p`,
the result is the concatenation of `splitOnP` called on `xs` and `as` -/
theorem splitOnP_append_cons (xs as : List α) {sep : α} (hsep : p sep) :
(xs ++ sep :: as).splitOnP p = List.splitOnP p xs ++ List.splitOnP p as := by
induction xs with
| nil => simp [splitOnP_cons_eq_if_modifyHead, hsep]
| cons hd tl ih =>
obtain ⟨hd1, tl1, h1'⟩ := List.exists_cons_of_ne_nil (List.splitOnP_ne_nil (p := p) (xs := tl))
by_cases hPh : p hd <;> simp [splitOnP_cons_eq_if_modifyHead, *]
/-- When a list of the form `[...xs, sep, ...as]` is split on `p`, the first element is `xs`,
assuming no element in `xs` satisfies `p` but `sep` does satisfy `p` -/
theorem splitOnP_append_cons_of_forall_mem (h : ∀ x ∈ xs, p x = false) (sep : α)
(hsep : p sep = true) (as : List α) : (xs ++ sep :: as).splitOnP p = xs :: as.splitOnP p := by
rw [splitOnP_append_cons xs as hsep, splitOnP_eq_singleton h, singleton_append]
@[deprecated splitOnP_append_cons_of_forall_mem (since := "2026-02-26")]
theorem splitOnP_first (h : ∀ x ∈ xs, p x = false) (sep : α)
(hsep : p sep = true) (as : List α) : (xs ++ sep :: as).splitOnP p = xs :: as.splitOnP p :=
splitOnP_append_cons_of_forall_mem h sep hsep as
@[simp]
theorem splitOn_eq_splitOnP [BEq α] {x : α} {xs : List α} : xs.splitOn x = xs.splitOnP (· == x) :=
(rfl)
/-- `intercalate [x]` is the left inverse of `splitOn x` -/
@[simp]
theorem intercalate_splitOn [BEq α] [LawfulBEq α] (x : α) : [x].intercalate (xs.splitOn x) = xs := by
simp only [splitOn_eq_splitOnP]
induction xs with
| nil => simp
| cons hd tl ih =>
simp only [splitOnP_cons_eq_if_modifyHead, beq_iff_eq]
split
· simp_all [intercalate_cons_of_ne_nil, splitOnP_ne_nil]
· have hsp := splitOnP_ne_nil (· == x) tl
generalize splitOnP (· == x) tl = ls at *
cases ls <;> simp_all
/-- `splitOn x` is the left inverse of `intercalate [x]`, on the domain
consisting of each nonempty list of lists `ls` whose elements do not contain `x` -/
theorem splitOn_intercalate [BEq α] [LawfulBEq α] (x : α) (hx : ∀ l ∈ ls, x ∉ l) (hls : ls ≠ []) :
([x].intercalate ls).splitOn x = ls := by
simp only [splitOn_eq_splitOnP]
induction ls with
| nil => simp at hls
| cons hd tl ih =>
simp only [mem_cons, forall_eq_or_imp] at ⊢ hx
have (y) (hy : y ∈ hd) : (y == x) = false := beq_eq_false_iff_ne.2 fun h => absurd hy (h ▸ hx.1)
match tl with
| [] => simpa using splitOnP_eq_singleton this
| t::tl =>
simp only [intercalate_cons_cons, append_assoc, cons_append, nil_append]
rw [splitOnP_append_cons_of_forall_mem this _ (by simp), ih hx.2 (by simp)]
end List