This PR adds deprecations for `Lean.HashMap` functions which did not receive deprecation attributes initially. (Requested on [zulip](https://leanprover.zulipchat.com/#narrow/channel/348111-batteries/topic/Unifying.20.60Batteries.2EHashMap.60.20and.20.60Std.2EHashMap.60.2E/near/484113660).) Note that I've used the original deprecation date of 2024-08-08, when the type itself was deprecated; this will not delay removal.
299 lines
13 KiB
Text
299 lines
13 KiB
Text
/-
|
||
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Author: Leonardo de Moura
|
||
-/
|
||
prelude
|
||
import Init.Data.Nat.Power2
|
||
import Lean.Data.AssocList
|
||
import Std.Data.HashMap.Basic
|
||
import Std.Data.HashMap.Raw
|
||
namespace Lean
|
||
|
||
def HashMapBucket (α : Type u) (β : Type v) :=
|
||
{ b : Array (AssocList α β) // b.size.isPowerOfTwo }
|
||
|
||
def HashMapBucket.update {α : Type u} {β : Type v} (data : HashMapBucket α β) (i : USize) (d : AssocList α β) (h : i.toNat < data.val.size) : HashMapBucket α β :=
|
||
⟨ data.val.uset i d h,
|
||
by erw [Array.size_set]; apply data.property ⟩
|
||
|
||
@[simp] theorem HashMapBucket.size_update {α : Type u} {β : Type v} (data : HashMapBucket α β) (i : USize) (d : AssocList α β)
|
||
(h : i.toNat < data.val.size) : (data.update i d h).val.size = data.val.size := by
|
||
simp [update, Array.uset]
|
||
|
||
structure HashMapImp (α : Type u) (β : Type v) where
|
||
size : Nat
|
||
buckets : HashMapBucket α β
|
||
|
||
private def numBucketsForCapacity (capacity : Nat) : Nat :=
|
||
-- a "load factor" of 0.75 is the usual standard for hash maps
|
||
capacity * 4 / 3
|
||
|
||
def mkHashMapImp {α : Type u} {β : Type v} (capacity := 8) : HashMapImp α β :=
|
||
{ size := 0
|
||
buckets :=
|
||
⟨mkArray (numBucketsForCapacity capacity).nextPowerOfTwo AssocList.nil,
|
||
by simp; apply Nat.isPowerOfTwo_nextPowerOfTwo⟩ }
|
||
|
||
namespace HashMapImp
|
||
variable {α : Type u} {β : Type v}
|
||
|
||
/- Remark: we use a C implementation because this function is performance critical. -/
|
||
@[extern "lean_hashmap_mk_idx"]
|
||
private def mkIdx {sz : Nat} (hash : UInt64) (h : sz.isPowerOfTwo) : { u : USize // u.toNat < sz } :=
|
||
-- TODO: avoid `if` in the reference implementation
|
||
let u := hash.toUSize &&& (sz.toUSize - 1)
|
||
if h' : u.toNat < sz then
|
||
⟨u, h'⟩
|
||
else
|
||
⟨0, by simp; apply Nat.pos_of_isPowerOfTwo h⟩
|
||
|
||
@[inline] def reinsertAux (hashFn : α → UInt64) (data : HashMapBucket α β) (a : α) (b : β) : HashMapBucket α β :=
|
||
let ⟨i, h⟩ := mkIdx (hashFn a) data.property
|
||
data.update i (AssocList.cons a b data.val[i]) h
|
||
|
||
@[inline] def foldBucketsM {δ : Type w} {m : Type w → Type w} [Monad m] (data : HashMapBucket α β) (d : δ) (f : δ → α → β → m δ) : m δ :=
|
||
data.val.foldlM (init := d) fun d b => b.foldlM f d
|
||
|
||
@[inline] def foldBuckets {δ : Type w} (data : HashMapBucket α β) (d : δ) (f : δ → α → β → δ) : δ :=
|
||
Id.run $ foldBucketsM data d f
|
||
|
||
@[inline] def foldM {δ : Type w} {m : Type w → Type w} [Monad m] (f : δ → α → β → m δ) (d : δ) (h : HashMapImp α β) : m δ :=
|
||
foldBucketsM h.buckets d f
|
||
|
||
@[inline] def fold {δ : Type w} (f : δ → α → β → δ) (d : δ) (m : HashMapImp α β) : δ :=
|
||
foldBuckets m.buckets d f
|
||
|
||
@[inline] def forBucketsM {m : Type w → Type w} [Monad m] (data : HashMapBucket α β) (f : α → β → m PUnit) : m PUnit :=
|
||
data.val.forM fun b => b.forM f
|
||
|
||
@[inline] def forM {m : Type w → Type w} [Monad m] (f : α → β → m PUnit) (h : HashMapImp α β) : m PUnit :=
|
||
forBucketsM h.buckets f
|
||
|
||
def findEntry? [BEq α] [Hashable α] (m : HashMapImp α β) (a : α) : Option (α × β) :=
|
||
match m with
|
||
| ⟨_, buckets⟩ =>
|
||
let ⟨i, h⟩ := mkIdx (hash a) buckets.property
|
||
buckets.val[i].findEntry? a
|
||
|
||
def find? [beq : BEq α] [Hashable α] (m : HashMapImp α β) (a : α) : Option β :=
|
||
match m with
|
||
| ⟨_, buckets⟩ =>
|
||
let ⟨i, h⟩ := mkIdx (hash a) buckets.property
|
||
buckets.val[i].find? a
|
||
|
||
def contains [BEq α] [Hashable α] (m : HashMapImp α β) (a : α) : Bool :=
|
||
match m with
|
||
| ⟨_, buckets⟩ =>
|
||
let ⟨i, h⟩ := mkIdx (hash a) buckets.property
|
||
buckets.val[i].contains a
|
||
|
||
def moveEntries [Hashable α] (i : Nat) (source : Array (AssocList α β)) (target : HashMapBucket α β) : HashMapBucket α β :=
|
||
if h : i < source.size then
|
||
let es : AssocList α β := source[i]
|
||
-- We remove `es` from `source` to make sure we can reuse its memory cells when performing es.foldl
|
||
let source := source.set i AssocList.nil
|
||
let target := es.foldl (reinsertAux hash) target
|
||
moveEntries (i+1) source target
|
||
else target
|
||
termination_by source.size - i
|
||
decreasing_by simp_wf; decreasing_trivial_pre_omega
|
||
|
||
def expand [Hashable α] (size : Nat) (buckets : HashMapBucket α β) : HashMapImp α β :=
|
||
let bucketsNew : HashMapBucket α β := ⟨
|
||
mkArray (buckets.val.size * 2) AssocList.nil,
|
||
by simp; apply Nat.mul2_isPowerOfTwo_of_isPowerOfTwo buckets.property
|
||
⟩
|
||
{ size := size,
|
||
buckets := moveEntries 0 buckets.val bucketsNew }
|
||
|
||
@[inline] def insert [beq : BEq α] [Hashable α] (m : HashMapImp α β) (a : α) (b : β) : HashMapImp α β × Bool :=
|
||
match m with
|
||
| ⟨size, buckets⟩ =>
|
||
let ⟨i, h⟩ := mkIdx (hash a) buckets.property
|
||
let bkt := buckets.val[i]
|
||
if bkt.contains a then
|
||
-- make sure `bkt` is used linearly in the following call to `replace`
|
||
let buckets' := buckets.update i .nil h
|
||
(⟨size, buckets'.update i (bkt.replace a b) (by simpa [buckets'])⟩, true)
|
||
else
|
||
let size' := size + 1
|
||
let buckets' := buckets.update i (AssocList.cons a b bkt) h
|
||
if numBucketsForCapacity size' ≤ buckets.val.size then
|
||
({ size := size', buckets := buckets' }, false)
|
||
else
|
||
(expand size' buckets', false)
|
||
|
||
@[inline] def insertIfNew [beq : BEq α] [Hashable α] (m : HashMapImp α β) (a : α) (b : β) : HashMapImp α β × Option β :=
|
||
match m with
|
||
| ⟨size, buckets⟩ =>
|
||
let ⟨i, h⟩ := mkIdx (hash a) buckets.property
|
||
let bkt := buckets.val[i]
|
||
if let some b := bkt.find? a then
|
||
(⟨size, buckets⟩, some b)
|
||
else
|
||
let size' := size + 1
|
||
let buckets' := buckets.update i (AssocList.cons a b bkt) h
|
||
if numBucketsForCapacity size' ≤ buckets.val.size then
|
||
({ size := size', buckets := buckets' }, none)
|
||
else
|
||
(expand size' buckets', none)
|
||
|
||
|
||
def erase [BEq α] [Hashable α] (m : HashMapImp α β) (a : α) : HashMapImp α β :=
|
||
match m with
|
||
| ⟨ size, buckets ⟩ =>
|
||
let ⟨i, h⟩ := mkIdx (hash a) buckets.property
|
||
let bkt := buckets.val[i]
|
||
if bkt.contains a then
|
||
-- make sure `bkt` is used linearly in the following call to `erase`
|
||
let buckets' := buckets.update i .nil h
|
||
⟨size - 1, buckets'.update i (bkt.erase a) (by simpa [buckets'])⟩
|
||
else
|
||
⟨size, buckets⟩
|
||
|
||
inductive WellFormed [BEq α] [Hashable α] : HashMapImp α β → Prop where
|
||
| mkWff : ∀ n, WellFormed (mkHashMapImp n)
|
||
| insertWff : ∀ m a b, WellFormed m → WellFormed (insert m a b |>.1)
|
||
| insertIfNewWff : ∀ m a b, WellFormed m → WellFormed (insertIfNew m a b |>.1)
|
||
| eraseWff : ∀ m a, WellFormed m → WellFormed (erase m a)
|
||
|
||
end HashMapImp
|
||
|
||
def HashMap (α : Type u) (β : Type v) [BEq α] [Hashable α] :=
|
||
{ m : HashMapImp α β // m.WellFormed }
|
||
|
||
open Lean.HashMapImp
|
||
|
||
def mkHashMap {α : Type u} {β : Type v} [BEq α] [Hashable α] (capacity := 8) : HashMap α β :=
|
||
⟨ mkHashMapImp capacity, WellFormed.mkWff capacity ⟩
|
||
|
||
namespace HashMap
|
||
instance [BEq α] [Hashable α] : Inhabited (HashMap α β) where
|
||
default := mkHashMap
|
||
|
||
instance [BEq α] [Hashable α] : EmptyCollection (HashMap α β) := ⟨mkHashMap⟩
|
||
|
||
@[inline] def empty [BEq α] [Hashable α] : HashMap α β :=
|
||
mkHashMap
|
||
|
||
variable {α : Type u} {β : Type v} {_ : BEq α} {_ : Hashable α}
|
||
|
||
def insert (m : HashMap α β) (a : α) (b : β) : HashMap α β :=
|
||
match m with
|
||
| ⟨ m, hw ⟩ =>
|
||
match h:m.insert a b with
|
||
| (m', _) => ⟨ m', by have aux := WellFormed.insertWff m a b hw; rw [h] at aux; assumption ⟩
|
||
|
||
/-- Similar to `insert`, but also returns a Boolean flag indicating whether an existing entry has been replaced with `a -> b`. -/
|
||
def insert' (m : HashMap α β) (a : α) (b : β) : HashMap α β × Bool :=
|
||
match m with
|
||
| ⟨ m, hw ⟩ =>
|
||
match h:m.insert a b with
|
||
| (m', replaced) => (⟨ m', by have aux := WellFormed.insertWff m a b hw; rw [h] at aux; assumption ⟩, replaced)
|
||
|
||
/--
|
||
Similar to `insert`, but returns `some old` if the map already had an entry `α → old`.
|
||
If the result is `some old`, the resulting map is equal to `m`. -/
|
||
def insertIfNew (m : HashMap α β) (a : α) (b : β) : HashMap α β × Option β :=
|
||
match m with
|
||
| ⟨ m, hw ⟩ =>
|
||
match h:m.insertIfNew a b with
|
||
| (m', old) => (⟨ m', by have aux := WellFormed.insertIfNewWff m a b hw; rw [h] at aux; assumption ⟩, old)
|
||
|
||
@[inline] def erase (m : HashMap α β) (a : α) : HashMap α β :=
|
||
match m with
|
||
| ⟨ m, hw ⟩ => ⟨ m.erase a, WellFormed.eraseWff m a hw ⟩
|
||
|
||
@[inline] def findEntry? (m : HashMap α β) (a : α) : Option (α × β) :=
|
||
match m with
|
||
| ⟨ m, _ ⟩ => m.findEntry? a
|
||
|
||
@[inline] def find? (m : HashMap α β) (a : α) : Option β :=
|
||
match m with
|
||
| ⟨ m, _ ⟩ => m.find? a
|
||
|
||
@[inline] def findD (m : HashMap α β) (a : α) (b₀ : β) : β :=
|
||
(m.find? a).getD b₀
|
||
|
||
@[inline] def find! [Inhabited β] (m : HashMap α β) (a : α) : β :=
|
||
match m.find? a with
|
||
| some b => b
|
||
| none => panic! "key is not in the map"
|
||
|
||
instance : GetElem (HashMap α β) α (Option β) fun _ _ => True where
|
||
getElem m k _ := m.find? k
|
||
|
||
@[inline] def contains (m : HashMap α β) (a : α) : Bool :=
|
||
match m with
|
||
| ⟨ m, _ ⟩ => m.contains a
|
||
|
||
@[inline] def foldM {δ : Type w} {m : Type w → Type w} [Monad m] (f : δ → α → β → m δ) (init : δ) (h : HashMap α β) : m δ :=
|
||
match h with
|
||
| ⟨ h, _ ⟩ => h.foldM f init
|
||
|
||
@[inline] def fold {δ : Type w} (f : δ → α → β → δ) (init : δ) (m : HashMap α β) : δ :=
|
||
match m with
|
||
| ⟨ m, _ ⟩ => m.fold f init
|
||
|
||
@[inline] def forM {m : Type w → Type w} [Monad m] (f : α → β → m PUnit) (h : HashMap α β) : m PUnit :=
|
||
match h with
|
||
| ⟨ h, _ ⟩ => h.forM f
|
||
|
||
@[inline] def size (m : HashMap α β) : Nat :=
|
||
match m with
|
||
| ⟨ {size := sz, ..}, _ ⟩ => sz
|
||
|
||
@[inline] def isEmpty (m : HashMap α β) : Bool :=
|
||
m.size = 0
|
||
|
||
def toList (m : HashMap α β) : List (α × β) :=
|
||
m.fold (init := []) fun r k v => (k, v)::r
|
||
|
||
def toArray (m : HashMap α β) : Array (α × β) :=
|
||
m.fold (init := #[]) fun r k v => r.push (k, v)
|
||
|
||
def numBuckets (m : HashMap α β) : Nat :=
|
||
m.val.buckets.val.size
|
||
|
||
variable [BEq α] [Hashable α]
|
||
|
||
/-- Builds a `HashMap` from a list of key-value pairs. Values of duplicated keys are replaced by their respective last occurrences. -/
|
||
def ofList (l : List (α × β)) : HashMap α β :=
|
||
l.foldl (init := HashMap.empty) (fun m p => m.insert p.fst p.snd)
|
||
|
||
/-- Variant of `ofList` which accepts a function that combines values of duplicated keys. -/
|
||
def ofListWith (l : List (α × β)) (f : β → β → β) : HashMap α β :=
|
||
l.foldl (init := HashMap.empty)
|
||
(fun m p =>
|
||
match m.find? p.fst with
|
||
| none => m.insert p.fst p.snd
|
||
| some v => m.insert p.fst $ f v p.snd)
|
||
|
||
attribute [deprecated Std.HashMap (since := "2024-08-08")] HashMap
|
||
attribute [deprecated Std.HashMap.Raw (since := "2024-08-08")] HashMapImp
|
||
attribute [deprecated Std.HashMap.Raw.empty (since := "2024-08-08")] mkHashMapImp
|
||
attribute [deprecated Std.HashMap.empty (since := "2024-08-08")] mkHashMap
|
||
attribute [deprecated Std.HashMap.empty (since := "2024-08-08")] HashMap.empty
|
||
attribute [deprecated Std.HashMap.ofList (since := "2024-08-08")] HashMap.ofList
|
||
|
||
attribute [deprecated Std.HashMap.insert (since := "2024-08-08")] HashMap.insert
|
||
attribute [deprecated Std.HashMap.containsThenInsert (since := "2024-08-08")] HashMap.insert'
|
||
attribute [deprecated Std.HashMap.insertIfNew (since := "2024-08-08")] HashMap.insertIfNew
|
||
attribute [deprecated Std.HashMap.erase (since := "2024-08-08")] HashMap.erase
|
||
attribute [deprecated "Use `m[k]?` instead." (since := "2024-08-08")] HashMap.findEntry?
|
||
attribute [deprecated "Use `m[k]?` instead." (since := "2024-08-08")] HashMap.find?
|
||
attribute [deprecated "Use `m[k]?.getD` instead." (since := "2024-08-08")] HashMap.findD
|
||
attribute [deprecated "Use `m[k]!` instead." (since := "2024-08-08")] HashMap.find!
|
||
attribute [deprecated Std.HashMap.contains (since := "2024-08-08")] HashMap.contains
|
||
attribute [deprecated Std.HashMap.foldM (since := "2024-08-08")] HashMap.foldM
|
||
attribute [deprecated Std.HashMap.fold (since := "2024-08-08")] HashMap.fold
|
||
attribute [deprecated Std.HashMap.forM (since := "2024-08-08")] HashMap.forM
|
||
attribute [deprecated Std.HashMap.size (since := "2024-08-08")] HashMap.size
|
||
attribute [deprecated Std.HashMap.isEmpty (since := "2024-08-08")] HashMap.isEmpty
|
||
attribute [deprecated Std.HashMap.toList (since := "2024-08-08")] HashMap.toList
|
||
attribute [deprecated Std.HashMap.toArray (since := "2024-08-08")] HashMap.toArray
|
||
attribute [deprecated "Deprecateed without a replacement." (since := "2024-08-08")] HashMap.numBuckets
|
||
attribute [deprecated "Deprecateed without a replacement." (since := "2024-08-08")] HashMap.ofListWith
|
||
|
||
end Lean.HashMap
|