lean4-htt/src/Std/Data/TreeSet/Basic.lean
Paul Reichert 0f1133fe69
feat: tree map data structures and operations (#6914)
This PR introduces ordered map data structures, namely `DTreeMap`,
`TreeMap`, `TreeSet` and their `.Raw` variants, into the standard
library. There are still some operations missing that the hash map has.
As of now, the operations are unverified, but the corresponding lemmas
will follow in subsequent PRs. While the tree map has already been
optimized, more micro-optimization will follow as soon as the new code
generator is ready.

---------

Co-authored-by: Paul Reichert <6992158+datokrat@users.noreply.github.com>
2025-02-11 14:47:47 +00:00

235 lines
8 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Markus Himmel, Paul Reichert
-/
prelude
import Std.Data.TreeMap.Basic
/-!
# Tree sets
This file develops the type `Std.TreeSet` of tree sets.
Lemmas about the operations on `Std.Data.TreeSet` will be available in the
module `Std.Data.TreeSet.Lemmas`.
See the module `Std.Data.TreeSet.Raw` for a variant of this type which is safe to use in
nested inductive types.
-/
set_option autoImplicit false
set_option linter.missingDocs true
universe u v w w₂
variable {α : Type u} {cmp : αα → Ordering}
namespace Std
/--
Tree sets.
A tree set stores elements of a certain type in a certain order. It depends on a comparator function
that defines an ordering on the keys and provides efficient order-dependent queries, such as
retrieval of the minimum or maximum.
To ensure that the operations behave as expected, the comparator function `cmp` should satisfy
certain laws that ensure a consistent ordering:
* If `a` is less than (or equal) to `b`, then `b` is greater than (or equal) to `a`
and vice versa (see the `OrientedCmp` typeclass).
* If `a` is less than or equal to `b` and `b` is, in turn, less than or equal to `c`, then `a`
is less than or equal to `c` (see the `TransCmp` typeclass).
Keys for which `cmp a b = Ordering.eq` are considered the same, i.e., there can be only one of them
be contained in a single tree set at the same time.
To avoid expensive copies, users should make sure that the tree set is used linearly.
Internally, the tree sets are represented as size-bounded trees, a type of self-balancing binary
search tree with efficient order statistic lookups.
These tree sets contain a bundled well-formedness invariant, which means that they cannot
be used in nested inductive types. For these use cases, `Std.Data.TreeSet.Raw` and
`Std.Data.TreeSet.Raw.WF` unbundle the invariant from the tree set. When in doubt, prefer
`TreeSet` over `TreeSet.Raw`.
-/
structure TreeSet (α : Type u) (cmp : αα → Ordering := by exact compare) where
/-- Internal implementation detail of the tree map. -/
inner : TreeMap α Unit cmp
namespace TreeSet
/--
Creates a new empty tree set. It is also possible and recommended to
use the empty collection notations `∅` and `{}` to create an empty tree set. `simp` replaces
`empty` with `∅`.
-/
@[inline]
def empty : TreeSet α cmp :=
⟨TreeMap.empty⟩
instance : EmptyCollection (TreeSet α cmp) where
emptyCollection := empty
instance : Inhabited (TreeSet α cmp) where
default := ∅
@[simp]
theorem empty_eq_emptyc : (empty : TreeSet α cmp) = ∅ :=
rfl
/--
Inserts the given element into the set. If the tree set already contains an element that is
equal (with regard to `cmp`) to the given element, then the tree set is returned unchanged.
Note: this non-replacement behavior is true for `TreeSet` and `TreeSet.Raw`.
The `insert` function on `TreeMap`, `DTreeMap`, `TreeMap.Raw` and `DTreeMap.Raw` behaves
differently: it will overwrite an existing mapping.
-/
@[inline]
def insert (l : TreeSet α cmp) (a : α) : TreeSet α cmp :=
⟨l.inner.insertIfNew a ()⟩
instance : Singleton α (TreeSet α cmp) where
singleton e := (∅ : TreeSet α cmp).insert e
instance : Insert α (TreeSet α cmp) where
insert e s := s.insert e
instance : LawfulSingleton α (TreeSet α cmp) where
insert_emptyc_eq _ := rfl
/--
Checks whether an element is present in a set and inserts the element if it was not found.
If the tree set already contains an element that is equal (with regard to `cmp` to the given
element, then the tree set is returned unchanged.
Equivalent to (but potentially faster than) calling `contains` followed by `insert`.
-/
@[inline]
def containsThenInsert (t : TreeSet α cmp) (a : α) : Bool × TreeSet α cmp :=
letI : Ord α := ⟨cmp⟩
let p := t.inner.containsThenInsertIfNew a ()
(p.1, ⟨p.2⟩)
/--
Returns `true` if `a`, or an element equal to `a` according to the comparator `cmp`, is contained
in the set. There is also a `Prop`-valued version of this: `a ∈ t` is equivalent to
`t.contains a = true`.
Observe that this is different behavior than for lists: for lists, `∈` uses `=` and `contains` uses
`==` for equality checks, while for tree sets, both use the given comparator `cmp`.
-/
@[inline]
def contains (l : TreeSet α cmp) (a : α) : Bool :=
l.inner.contains a
instance : Membership α (TreeSet α cmp) where
mem m a := m.contains a
instance {m : TreeSet α cmp} {a : α} : Decidable (a ∈ m) :=
inferInstanceAs <| Decidable (m.contains a)
/-- Returns the number of mappings present in the map. -/
@[inline]
def size (t : TreeSet α cmp) : Nat :=
t.inner.size
/-- Returns `true` if the tree set contains no mappings. -/
@[inline]
def isEmpty (t : TreeSet α cmp) : Bool :=
t.inner.isEmpty
/-- Removes the given key if it exists. -/
@[inline]
def erase (t : TreeSet α cmp) (a : α) : TreeSet α cmp :=
⟨t.inner.erase a⟩
variable {γ δ: Type w} {m : Type w → Type w₂} [Monad m]
/-- Removes all elements from the tree set for which the given function returns `false`. -/
@[inline]
def filter (f : α → Bool) (m : TreeSet α cmp) : TreeSet α cmp :=
⟨m.inner.filter fun a _ => f a⟩
/--
Monadically computes a value by folding the given function over the elements in the tree set in
ascending order.
-/
@[inline]
def foldlM {m δ} [Monad m] (f : δ → (a : α) → m δ) (init : δ) (t : TreeSet α cmp) : m δ :=
t.inner.foldlM (fun c a _ => f c a) init
/-- Folds the given function over the elements of the tree set in ascending order. -/
@[inline]
def foldl (f : δ → (a : α) → δ) (init : δ) (t : TreeSet α cmp) : δ :=
t.inner.foldl (fun c a _ => f c a) init
/-- Carries out a monadic action on each element in the tree set in ascending order. -/
@[inline]
def forM (f : α → m PUnit) (t : TreeSet α cmp) : m PUnit :=
t.inner.forM (fun a _ => f a)
/--
Support for the `for` loop construct in `do` blocks. The iteration happens in ascending
order.
-/
@[inline]
def forIn (f : α → δ → m (ForInStep δ)) (init : δ) (t : TreeSet α cmp) : m δ :=
t.inner.forIn (fun a _ c => f a c) init
instance : ForM m (TreeSet α cmp) α where
forM t f := t.forM f
instance : ForIn m (TreeSet α cmp) α where
forIn m init f := m.forIn (fun a acc => f a acc) init
/-- Check if all elements satisfy the predicate, short-circuiting if a predicate fails. -/
@[inline]
def any (t : TreeSet α cmp) (p : α → Bool) : Bool :=
t.inner.any (fun a _ => p a)
/-- Check if any element satisfies the predicate, short-circuiting if a predicate succeeds. -/
@[inline]
def all (t : TreeSet α cmp) (p : α → Bool) : Bool :=
t.inner.all (fun a _ => p a)
/-- Transforms the tree set into a list of elements in ascending order. -/
@[inline]
def toList (t : TreeSet α cmp) : List α :=
t.inner.inner.inner.foldr (fun l a _ => a :: l) ∅
/-- Transforms the tree set into an array of elements in ascending order. -/
@[inline]
def toArray (t : TreeSet α cmp) : Array α :=
t.foldl (init := ∅) fun acc k => acc.push k
/--
Returns a set that contains all mappings of `t₁` and `t₂.
This function ensures that `t₁` is used linearly.
Hence, as long as `t₁` is unshared, the performance characteristics follow the following imperative
description: Iterate over all mappings in `t₂`, inserting them into `t₁`.
Hence, the runtime of this method scales logarithmically in the size of `t₁` and linearly in the
size of `t₂` as long as `t₁` is unshared.
-/
@[inline]
def merge (t₁ t₂ : TreeSet α cmp) : TreeSet α cmp :=
⟨TreeMap.mergeWith (fun _ _ _ => ()) t₁.inner t₂.inner⟩
/--
Erases multiple items from the tree set by iterating over the given collection and calling erase.
-/
@[inline]
def eraseMany {ρ} [ForIn Id ρ α] (t : TreeSet α cmp) (l : ρ) : TreeSet α cmp :=
⟨t.inner.eraseMany l⟩
instance [Repr α] : Repr (TreeSet α cmp) where
reprPrec m prec := Repr.addAppParen ("TreeSet.ofList " ++ repr m.toList) prec
end TreeSet
end Std