296 lines
11 KiB
Text
296 lines
11 KiB
Text
/-
|
||
Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Authors: Markus Himmel
|
||
-/
|
||
prelude
|
||
import Std.Data.DHashMap.Basic
|
||
|
||
set_option linter.missingDocs true
|
||
set_option autoImplicit false
|
||
|
||
/-!
|
||
# Hash maps
|
||
|
||
This module develops the type `Std.Data.HashMap` of hash maps. Dependent hash maps are defined in
|
||
`Std.Data.DHashMap`.
|
||
|
||
The operations `map` and `filterMap` on `Std.Data.HashMap` are defined in the module
|
||
`Std.Data.HashMap.AdditionalOperations`.
|
||
|
||
Lemmas about the operations on `Std.Data.HashMap` are available in the
|
||
module `Std.Data.HashMap.Lemmas`.
|
||
|
||
See the module `Std.Data.HashMap.Raw` for a variant of this type which is safe to use in
|
||
nested inductive types.
|
||
-/
|
||
|
||
universe u v w
|
||
|
||
variable {α : Type u} {β : Type v} {_ : BEq α} {_ : Hashable α}
|
||
|
||
namespace Std
|
||
|
||
/--
|
||
Hash maps.
|
||
|
||
This is a simple separate-chaining hash table. The data of the hash map consists of a cached size
|
||
and an array of buckets, where each bucket is a linked list of key-value pais. The number of buckets
|
||
is always a power of two. The hash map doubles its size upon inserting an element such that the
|
||
number of elements is more than 75% of the number of buckets.
|
||
|
||
The hash table is backed by an `Array`. Users should make sure that the hash map is used linearly to
|
||
avoid expensive copies.
|
||
|
||
The hash map uses `==` (provided by the `BEq` typeclass) to compare keys and `hash` (provided by
|
||
the `Hashable` typeclass) to hash them. To ensure that the operations behave as expected, `==`
|
||
should be an equivalence relation and `a == b` should imply `hash a = hash b` (see also the
|
||
`EquivBEq` and `LawfulHashable` typeclasses). Both of these conditions are automatic if the BEq
|
||
instance is lawful, i.e., if `a == b` implies `a = b`.
|
||
|
||
These hash maps contain a bundled well-formedness invariant, which means that they cannot
|
||
be used in nested inductive types. For these use cases, `Std.Data.HashMap.Raw` and
|
||
`Std.Data.HashMap.Raw.WF` unbundle the invariant from the hash map. When in doubt, prefer
|
||
`HashMap` over `HashMap.Raw`.
|
||
|
||
Dependent hash maps, in which keys may occur in their values' types, are available as
|
||
`Std.Data.DHashMap`.
|
||
-/
|
||
structure HashMap (α : Type u) (β : Type v) [BEq α] [Hashable α] where
|
||
/-- Internal implementation detail of the hash map -/
|
||
inner : DHashMap α (fun _ => β)
|
||
|
||
namespace HashMap
|
||
|
||
@[inline, inherit_doc DHashMap.empty] def empty [BEq α] [Hashable α] (capacity := 8) :
|
||
HashMap α β :=
|
||
⟨DHashMap.empty capacity⟩
|
||
|
||
instance [BEq α] [Hashable α] : EmptyCollection (HashMap α β) where
|
||
emptyCollection := empty
|
||
|
||
instance [BEq α] [Hashable α] : Inhabited (HashMap α β) where
|
||
default := ∅
|
||
|
||
@[inline, inherit_doc DHashMap.insert] def insert (m : HashMap α β) (a : α)
|
||
(b : β) : HashMap α β :=
|
||
⟨m.inner.insert a b⟩
|
||
|
||
instance : Singleton (α × β) (HashMap α β) := ⟨fun ⟨a, b⟩ => HashMap.empty.insert a b⟩
|
||
|
||
instance : Insert (α × β) (HashMap α β) := ⟨fun ⟨a, b⟩ s => s.insert a b⟩
|
||
|
||
instance : LawfulSingleton (α × β) (HashMap α β) := ⟨fun _ => rfl⟩
|
||
|
||
@[inline, inherit_doc DHashMap.insertIfNew] def insertIfNew (m : HashMap α β)
|
||
(a : α) (b : β) : HashMap α β :=
|
||
⟨m.inner.insertIfNew a b⟩
|
||
|
||
@[inline, inherit_doc DHashMap.containsThenInsert] def containsThenInsert
|
||
(m : HashMap α β) (a : α) (b : β) : Bool × HashMap α β :=
|
||
let ⟨replaced, r⟩ := m.inner.containsThenInsert a b
|
||
⟨replaced, ⟨r⟩⟩
|
||
|
||
@[inline, inherit_doc DHashMap.containsThenInsertIfNew] def containsThenInsertIfNew
|
||
(m : HashMap α β) (a : α) (b : β) : Bool × HashMap α β :=
|
||
let ⟨replaced, r⟩ := m.inner.containsThenInsertIfNew a b
|
||
⟨replaced, ⟨r⟩⟩
|
||
|
||
/--
|
||
Checks whether a key is present in a map, returning the associate value, and inserts a value for
|
||
the key if it was not found.
|
||
|
||
If the returned value is `some v`, then the returned map is unaltered. If it is `none`, then the
|
||
returned map has a new value inserted.
|
||
|
||
Equivalent to (but potentially faster than) calling `get?` followed by `insertIfNew`.
|
||
-/
|
||
@[inline] def getThenInsertIfNew? (m : HashMap α β) (a : α) (b : β) :
|
||
Option β × HashMap α β :=
|
||
let ⟨previous, r⟩ := DHashMap.Const.getThenInsertIfNew? m.inner a b
|
||
⟨previous, ⟨r⟩⟩
|
||
|
||
/--
|
||
The notation `m[a]?` is preferred over calling this function directly.
|
||
|
||
Tries to retrieve the mapping for the given key, returning `none` if no such mapping is present.
|
||
-/
|
||
@[inline] def get? (m : HashMap α β) (a : α) : Option β :=
|
||
DHashMap.Const.get? m.inner a
|
||
|
||
@[deprecated get? "Use `m[a]?` or `m.get? a` instead", inherit_doc get?]
|
||
def find? (m : HashMap α β) (a : α) : Option β :=
|
||
m.get? a
|
||
|
||
@[inline, inherit_doc DHashMap.contains] def contains (m : HashMap α β)
|
||
(a : α) : Bool :=
|
||
m.inner.contains a
|
||
|
||
instance [BEq α] [Hashable α] : Membership α (HashMap α β) where
|
||
mem m a := a ∈ m.inner
|
||
|
||
instance [BEq α] [Hashable α] {m : HashMap α β} {a : α} : Decidable (a ∈ m) :=
|
||
inferInstanceAs (Decidable (a ∈ m.inner))
|
||
|
||
/--
|
||
The notation `m[a]` or `m[a]'h` is preferred over calling this function directly.
|
||
|
||
Retrieves the mapping for the given key. Ensures that such a mapping exists by requiring a proof of
|
||
`a ∈ m`.
|
||
-/
|
||
@[inline] def get (m : HashMap α β) (a : α) (h : a ∈ m) : β :=
|
||
DHashMap.Const.get m.inner a h
|
||
|
||
@[inline, inherit_doc DHashMap.Const.getD] def getD (m : HashMap α β) (a : α)
|
||
(fallback : β) : β :=
|
||
DHashMap.Const.getD m.inner a fallback
|
||
|
||
@[deprecated getD, inherit_doc getD]
|
||
def findD (m : HashMap α β) (a : α) (fallback : β) : β :=
|
||
m.getD a fallback
|
||
|
||
/--
|
||
The notation `m[a]!` is preferred over calling this function directly.
|
||
|
||
Tries to retrieve the mapping for the given key, panicking if no such mapping is present.
|
||
-/
|
||
@[inline] def get! [Inhabited β] (m : HashMap α β) (a : α) : β :=
|
||
DHashMap.Const.get! m.inner a
|
||
|
||
@[deprecated get! "Use `m[a]!` or `m.get! a` instead", inherit_doc get!]
|
||
def find! [Inhabited β] (m : HashMap α β) (a : α) : Option β :=
|
||
m.get! a
|
||
|
||
instance [BEq α] [Hashable α] : GetElem? (HashMap α β) α β (fun m a => a ∈ m) where
|
||
getElem m a h := m.get a h
|
||
getElem? m a := m.get? a
|
||
getElem! m a := m.get! a
|
||
|
||
@[inline, inherit_doc DHashMap.getKey?] def getKey? (m : HashMap α β) (a : α) : Option α :=
|
||
DHashMap.getKey? m.inner a
|
||
|
||
@[inline, inherit_doc DHashMap.getKey] def getKey (m : HashMap α β) (a : α) (h : a ∈ m) : α :=
|
||
DHashMap.getKey m.inner a h
|
||
|
||
@[inline, inherit_doc DHashMap.getKeyD] def getKeyD (m : HashMap α β) (a : α) (fallback : α) : α :=
|
||
DHashMap.getKeyD m.inner a fallback
|
||
|
||
@[inline, inherit_doc DHashMap.getKey!] def getKey! [Inhabited α] (m : HashMap α β) (a : α) : α :=
|
||
DHashMap.getKey! m.inner a
|
||
|
||
@[inline, inherit_doc DHashMap.erase] def erase (m : HashMap α β) (a : α) :
|
||
HashMap α β :=
|
||
⟨m.inner.erase a⟩
|
||
|
||
@[inline, inherit_doc DHashMap.size] def size (m : HashMap α β) : Nat :=
|
||
m.inner.size
|
||
|
||
@[inline, inherit_doc DHashMap.isEmpty] def isEmpty (m : HashMap α β) : Bool :=
|
||
m.inner.isEmpty
|
||
|
||
section Unverified
|
||
|
||
/-! We currently do not provide lemmas for the functions below. -/
|
||
|
||
@[inline, inherit_doc DHashMap.filter] def filter (f : α → β → Bool)
|
||
(m : HashMap α β) : HashMap α β :=
|
||
⟨m.inner.filter f⟩
|
||
|
||
@[inline, inherit_doc DHashMap.partition] def partition (f : α → β → Bool)
|
||
(m : HashMap α β) : HashMap α β × HashMap α β :=
|
||
let ⟨l, r⟩ := m.inner.partition f
|
||
⟨⟨l⟩, ⟨r⟩⟩
|
||
|
||
@[inline, inherit_doc DHashMap.foldM] def foldM {m : Type w → Type w}
|
||
[Monad m] {γ : Type w} (f : γ → α → β → m γ) (init : γ) (b : HashMap α β) : m γ :=
|
||
b.inner.foldM f init
|
||
|
||
@[inline, inherit_doc DHashMap.fold] def fold {γ : Type w}
|
||
(f : γ → α → β → γ) (init : γ) (b : HashMap α β) : γ :=
|
||
b.inner.fold f init
|
||
|
||
@[inline, inherit_doc DHashMap.forM] def forM {m : Type w → Type w} [Monad m]
|
||
(f : (a : α) → β → m PUnit) (b : HashMap α β) : m PUnit :=
|
||
b.inner.forM f
|
||
|
||
@[inline, inherit_doc DHashMap.forIn] def forIn {m : Type w → Type w} [Monad m]
|
||
{γ : Type w} (f : (a : α) → β → γ → m (ForInStep γ)) (init : γ) (b : HashMap α β) : m γ :=
|
||
b.inner.forIn f init
|
||
|
||
instance [BEq α] [Hashable α] {m : Type w → Type w} : ForM m (HashMap α β) (α × β) where
|
||
forM m f := m.forM (fun a b => f (a, b))
|
||
|
||
instance [BEq α] [Hashable α] {m : Type w → Type w} : ForIn m (HashMap α β) (α × β) where
|
||
forIn m init f := m.forIn (fun a b acc => f (a, b) acc) init
|
||
|
||
@[inline, inherit_doc DHashMap.Const.toList] def toList (m : HashMap α β) :
|
||
List (α × β) :=
|
||
DHashMap.Const.toList m.inner
|
||
|
||
@[inline, inherit_doc DHashMap.Const.toArray] def toArray (m : HashMap α β) :
|
||
Array (α × β) :=
|
||
DHashMap.Const.toArray m.inner
|
||
|
||
@[inline, inherit_doc DHashMap.keys] def keys (m : HashMap α β) : List α :=
|
||
m.inner.keys
|
||
|
||
@[inline, inherit_doc DHashMap.keysArray] def keysArray (m : HashMap α β) :
|
||
Array α :=
|
||
m.inner.keysArray
|
||
|
||
@[inline, inherit_doc DHashMap.values] def values (m : HashMap α β) : List β :=
|
||
m.inner.values
|
||
|
||
@[inline, inherit_doc DHashMap.valuesArray] def valuesArray (m : HashMap α β) :
|
||
Array β :=
|
||
m.inner.valuesArray
|
||
|
||
@[inline, inherit_doc DHashMap.Const.insertMany] def insertMany {ρ : Type w}
|
||
[ForIn Id ρ (α × β)] (m : HashMap α β) (l : ρ) : HashMap α β :=
|
||
⟨DHashMap.Const.insertMany m.inner l⟩
|
||
|
||
@[inline, inherit_doc DHashMap.Const.insertManyUnit] def insertManyUnit
|
||
{ρ : Type w} [ForIn Id ρ α] (m : HashMap α Unit) (l : ρ) : HashMap α Unit :=
|
||
⟨DHashMap.Const.insertManyUnit m.inner l⟩
|
||
|
||
@[inline, inherit_doc DHashMap.Const.ofList] def ofList [BEq α] [Hashable α] (l : List (α × β)) :
|
||
HashMap α β :=
|
||
⟨DHashMap.Const.ofList l⟩
|
||
|
||
/-- Computes the union of the given hash maps, by traversing `m₂` and inserting its elements into `m₁`. -/
|
||
@[inline] def union [BEq α] [Hashable α] (m₁ m₂ : HashMap α β) : HashMap α β :=
|
||
m₂.fold (init := m₁) fun acc x => acc.insert x
|
||
|
||
instance [BEq α] [Hashable α] : Union (HashMap α β) := ⟨union⟩
|
||
|
||
@[inline, inherit_doc DHashMap.Const.unitOfList] def unitOfList [BEq α] [Hashable α] (l : List α) :
|
||
HashMap α Unit :=
|
||
⟨DHashMap.Const.unitOfList l⟩
|
||
|
||
@[inline, inherit_doc DHashMap.Const.unitOfArray] def unitOfArray [BEq α] [Hashable α] (l : Array α) :
|
||
HashMap α Unit :=
|
||
⟨DHashMap.Const.unitOfArray l⟩
|
||
|
||
@[inline, inherit_doc DHashMap.Internal.numBuckets] def Internal.numBuckets
|
||
(m : HashMap α β) : Nat :=
|
||
DHashMap.Internal.numBuckets m.inner
|
||
|
||
instance [BEq α] [Hashable α] [Repr α] [Repr β] : Repr (HashMap α β) where
|
||
reprPrec m prec := Repr.addAppParen ("Std.HashMap.ofList " ++ reprArg m.toList) prec
|
||
|
||
end Unverified
|
||
|
||
end Std.HashMap
|
||
|
||
/--
|
||
Groups all elements `x`, `y` in `xs` with `key x == key y` into the same array
|
||
`(xs.groupByKey key).find! (key x)`. Groups preserve the relative order of elements in `xs`.
|
||
-/
|
||
def Array.groupByKey [BEq α] [Hashable α] (key : β → α) (xs : Array β)
|
||
: Std.HashMap α (Array β) := Id.run do
|
||
let mut groups := ∅
|
||
for x in xs do
|
||
let group := groups.getD (key x) #[]
|
||
groups := groups.erase (key x) -- make `group` referentially unique
|
||
groups := groups.insert (key x) (group.push x)
|
||
return groups
|