/- Copyright (c) 2024 Lean FRO, LLC. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Markus Himmel -/ prelude import Std.Data.HashMap.Basic /-! # Hash sets This module develops the type `Std.Data.HashSet` of dependent hash sets. Lemmas about the operations on `Std.Data.HashSet` are available in the module `Std.Data.HashSet.Lemmas`. See the module `Std.Data.HashSet.Raw` for a variant of this type which is safe to use in nested inductive types. -/ set_option linter.missingDocs true set_option autoImplicit false universe u v variable {α : Type u} {_ : BEq α} {_ : Hashable α} namespace Std /-- Hash sets. This is a simple separate-chaining hash table. The data of the hash set consists of a cached size and an array of buckets, where each bucket is a linked list of keys. The number of buckets is always a power of two. The hash set doubles its size upon inserting an element such that the number of elements is more than 75% of the number of buckets. The hash table is backed by an `Array`. Users should make sure that the hash set is used linearly to avoid expensive copies. The hash set uses `==` (provided by the `BEq` typeclass) to compare elements and `hash` (provided by the `Hashable` typeclass) to hash them. To ensure that the operations behave as expected, `==` should be an equivalence relation and `a == b` should imply `hash a = hash b` (see also the `EquivBEq` and `LawfulHashable` typeclasses). Both of these conditions are automatic if the BEq instance is lawful, i.e., if `a == b` implies `a = b`. These hash sets contain a bundled well-formedness invariant, which means that they cannot be used in nested inductive types. For these use cases, `Std.Data.HashSet.Raw` and `Std.Data.HashSet.Raw.WF` unbundle the invariant from the hash set. When in doubt, prefer `HashSet` over `HashSet.Raw`. -/ structure HashSet (α : Type u) [BEq α] [Hashable α] where /-- Internal implementation detail of the hash set. -/ inner : HashMap α Unit namespace HashSet /-- Creates a new empty hash set. The optional parameter `capacity` can be supplied to presize the set so that it can hold the given number of elements without reallocating. It is also possible to use the empty collection notations `∅` and `{}` to create an empty hash set with the default capacity. -/ @[inline] def empty [BEq α] [Hashable α] (capacity := 8) : HashSet α := ⟨HashMap.empty capacity⟩ instance [BEq α] [Hashable α] : EmptyCollection (HashSet α) where emptyCollection := empty instance [BEq α] [Hashable α] : Inhabited (HashSet α) where default := ∅ /-- Inserts the given element into the set. If the hash set already contains an element that is equal (with regard to `==`) to the given element, then the hash set is returned unchanged. -/ @[inline] def insert (m : HashSet α) (a : α) : HashSet α := ⟨m.inner.insertIfNew a ()⟩ /-- Checks whether an element is present in a set and inserts the element if it was not found. If the hash set already contains an element that is equal (with regard to `==`) to the given element, then the hash set is returned unchanged. Equivalent to (but potentially faster than) calling `contains` followed by `insert`. -/ @[inline] def containsThenInsert (m : HashSet α) (a : α) : Bool × HashSet α := let ⟨replaced, r⟩ := m.inner.containsThenInsertIfNew a () ⟨replaced, ⟨r⟩⟩ /-- Returns `true` if the given key is present in the set. There is also a `Prop`-valued version of this: `a ∈ m` is equivalent to `m.contains a = true`. Observe that this is different behavior than for lists: for lists, `∈` uses `=` and `contains` use `==` for comparisons, while for hash sets, both use `==`. -/ @[inline] def contains (m : HashSet α) (a : α) : Bool := m.inner.contains a instance [BEq α] [Hashable α] : Membership α (HashSet α) where mem a m := a ∈ m.inner instance [BEq α] [Hashable α] {m : HashSet α} {a : α} : Decidable (a ∈ m) := inferInstanceAs (Decidable (a ∈ m.inner)) /-- Removes the element if it exists. -/ @[inline] def erase (m : HashSet α) (a : α) : HashSet α := ⟨m.inner.erase a⟩ /-- The number of elements present in the set -/ @[inline] def size (m : HashSet α) : Nat := m.inner.size /-- Returns `true` if the hash set contains no elements. Note that if your `BEq` instance is not reflexive or your `Hashable` instance is not lawful, then it is possible that this function returns `false` even though `m.contains a = false` for all `a`. -/ @[inline] def isEmpty (m : HashSet α) : Bool := m.inner.isEmpty section Unverified /-! We currently do not provide lemmas for the functions below. -/ /-- Removes all elements from the hash set for which the given function returns `false`. -/ @[inline] def filter (f : α → Bool) (m : HashSet α) : HashSet α := ⟨m.inner.filter fun a _ => f a⟩ /-- Monadically computes a value by folding the given function over the elements in the hash set in some order. -/ @[inline] def foldM {m : Type v → Type v} [Monad m] {β : Type v} (f : β → α → m β) (init : β) (b : HashSet α) : m β := b.inner.foldM (fun b a _ => f b a) init /-- Folds the given function over the elements of the hash set in some order. -/ @[inline] def fold {β : Type v} (f : β → α → β) (init : β) (m : HashSet α) : β := m.inner.fold (fun b a _ => f b a) init /-- Carries out a monadic action on each element in the hash set in some order. -/ @[inline] def forM {m : Type v → Type v} [Monad m] (f : α → m PUnit) (b : HashSet α) : m PUnit := b.inner.forM (fun a _ => f a) /-- Support for the `for` loop construct in `do` blocks. -/ @[inline] def forIn {m : Type v → Type v} [Monad m] {β : Type v} (f : α → β → m (ForInStep β)) (init : β) (b : HashSet α) : m β := b.inner.forIn (fun a _ acc => f a acc) init instance [BEq α] [Hashable α] {m : Type v → Type v} : ForM m (HashSet α) α where forM m f := m.forM f instance [BEq α] [Hashable α] {m : Type v → Type v} : ForIn m (HashSet α) α where forIn m init f := m.forIn f init /-- Transforms the hash set into a list of elements in some order. -/ @[inline] def toList (m : HashSet α) : List α := m.inner.keys /-- Transforms the hash set into an array of elements in some order. -/ @[inline] def toArray (m : HashSet α) : Array α := m.inner.keysArray /-- Inserts multiple elements into the hash set. Note that unlike repeatedly calling `insert`, if the collection contains multiple elements that are equal (with regard to `==`), then the last element in the collection will be present in the returned hash set. -/ @[inline] def insertMany {ρ : Type v} [ForIn Id ρ α] (m : HashSet α) (l : ρ) : HashSet α := ⟨m.inner.insertManyUnit l⟩ /-- Creates a hash set from a list of elements. Note that unlike repeatedly calling `insert`, if the collection contains multiple elements that are equal (with regard to `==`), then the last element in the collection will be present in the returned hash set. -/ @[inline] def ofList [BEq α] [Hashable α] (l : List α) : HashSet α := ⟨HashMap.unitOfList l⟩ /-- Computes the union of the given hash sets. -/ @[inline] def union [BEq α] [Hashable α] (m₁ m₂ : HashSet α) : HashSet α := m₂.fold (init := m₁) fun acc x => acc.insert x /-- Returns the number of buckets in the internal representation of the hash set. This function may be useful for things like monitoring system health, but it should be considered an internal implementation detail. -/ def Internal.numBuckets (m : HashSet α) : Nat := HashMap.Internal.numBuckets m.inner instance [BEq α] [Hashable α] [Repr α] : Repr (HashSet α) where reprPrec m prec := Repr.addAppParen ("Std.HashSet.ofList " ++ reprArg m.toList) prec end Unverified end HashSet end Std