lean4-htt/src/Lean/Data/HashSet.lean
2022-08-31 11:48:57 -07:00

179 lines
6.5 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2019 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
-/
namespace Std
universe u v w
def HashSetBucket (α : Type u) :=
{ b : Array (List α) // b.size > 0 }
def HashSetBucket.update {α : Type u} (data : HashSetBucket α) (i : USize) (d : List α) (h : i.toNat < data.val.size) : HashSetBucket α :=
⟨ data.val.uset i d h,
by erw [Array.size_set]; apply data.property ⟩
structure HashSetImp (α : Type u) where
size : Nat
buckets : HashSetBucket α
def mkHashSetImp {α : Type u} (nbuckets := 8) : HashSetImp α :=
let n := if nbuckets = 0 then 8 else nbuckets
{ size := 0,
buckets :=
⟨ mkArray n [],
by rw [Array.size_mkArray]; cases nbuckets; decide; apply Nat.zero_lt_succ ⟩ }
namespace HashSetImp
variable {α : Type u}
def mkIdx {n : Nat} (h : n > 0) (u : USize) : { u : USize // u.toNat < n } :=
⟨u % n, USize.modn_lt _ h⟩
@[inline] def reinsertAux (hashFn : α → UInt64) (data : HashSetBucket α) (a : α) : HashSetBucket α :=
let ⟨i, h⟩ := mkIdx data.property (hashFn a |>.toUSize)
data.update i (a :: data.val[i]) h
@[inline] def foldBucketsM {δ : Type w} {m : Type w → Type w} [Monad m] (data : HashSetBucket α) (d : δ) (f : δ → α → m δ) : m δ :=
data.val.foldlM (init := d) fun d as => as.foldlM f d
@[inline] def foldBuckets {δ : Type w} (data : HashSetBucket α) (d : δ) (f : δ → α → δ) : δ :=
Id.run $ foldBucketsM data d f
@[inline] def foldM {δ : Type w} {m : Type w → Type w} [Monad m] (f : δ → α → m δ) (d : δ) (h : HashSetImp α) : m δ :=
foldBucketsM h.buckets d f
@[inline] def fold {δ : Type w} (f : δ → α → δ) (d : δ) (m : HashSetImp α) : δ :=
foldBuckets m.buckets d f
@[inline] def forBucketsM {m : Type w → Type w} [Monad m] (data : HashSetBucket α) (f : α → m PUnit) : m PUnit :=
data.val.forM fun as=> as.forM f
@[inline] def forM {m : Type w → Type w} [Monad m] (f : α → m PUnit) (h : HashSetImp α) : m PUnit :=
forBucketsM h.buckets f
def find? [BEq α] [Hashable α] (m : HashSetImp α) (a : α) : Option α :=
match m with
| ⟨_, buckets⟩ =>
let ⟨i, h⟩ := mkIdx buckets.property (hash a |>.toUSize)
buckets.val[i].find? (fun a' => a == a')
def contains [BEq α] [Hashable α] (m : HashSetImp α) (a : α) : Bool :=
match m with
| ⟨_, buckets⟩ =>
let ⟨i, h⟩ := mkIdx buckets.property (hash a |>.toUSize)
buckets.val[i].contains a
def moveEntries [Hashable α] (i : Nat) (source : Array (List α)) (target : HashSetBucket α) : HashSetBucket α :=
if h : i < source.size then
let idx : Fin source.size := ⟨i, h⟩
let es : List α := source.get idx
-- We remove `es` from `source` to make sure we can reuse its memory cells when performing es.foldl
let source := source.set idx []
let target := es.foldl (reinsertAux hash) target
moveEntries (i+1) source target
else
target
termination_by _ i source _ => source.size - i
def expand [Hashable α] (size : Nat) (buckets : HashSetBucket α) : HashSetImp α :=
let nbuckets := buckets.val.size * 2
have : nbuckets > 0 := Nat.mul_pos buckets.property (by decide)
let new_buckets : HashSetBucket α := ⟨mkArray nbuckets [], by rw [Array.size_mkArray]; assumption⟩
{ size := size,
buckets := moveEntries 0 buckets.val new_buckets }
def insert [BEq α] [Hashable α] (m : HashSetImp α) (a : α) : HashSetImp α :=
match m with
| ⟨size, buckets⟩ =>
let ⟨i, h⟩ := mkIdx buckets.property (hash a |>.toUSize)
let bkt := buckets.val[i]
if bkt.contains a
then ⟨size, buckets.update i (bkt.replace a a) h⟩
else
let size' := size + 1
let buckets' := buckets.update i (a :: bkt) h
if size' ≤ buckets.val.size
then { size := size', buckets := buckets' }
else expand size' buckets'
def erase [BEq α] [Hashable α] (m : HashSetImp α) (a : α) : HashSetImp α :=
match m with
| ⟨ size, buckets ⟩ =>
let ⟨i, h⟩ := mkIdx buckets.property (hash a |>.toUSize)
let bkt := buckets.val[i]
if bkt.contains a then ⟨size - 1, buckets.update i (bkt.erase a) h⟩
else m
inductive WellFormed [BEq α] [Hashable α] : HashSetImp α → Prop where
| mkWff : ∀ n, WellFormed (mkHashSetImp n)
| insertWff : ∀ m a, WellFormed m → WellFormed (insert m a)
| eraseWff : ∀ m a, WellFormed m → WellFormed (erase m a)
end HashSetImp
def HashSet (α : Type u) [BEq α] [Hashable α] :=
{ m : HashSetImp α // m.WellFormed }
open HashSetImp
def mkHashSet {α : Type u} [BEq α] [Hashable α] (nbuckets := 8) : HashSet α :=
⟨ mkHashSetImp nbuckets, WellFormed.mkWff nbuckets ⟩
namespace HashSet
@[inline] def empty [BEq α] [Hashable α] : HashSet α :=
mkHashSet
instance [BEq α] [Hashable α] : Inhabited (HashSet α) where
default := mkHashSet
instance [BEq α] [Hashable α] : EmptyCollection (HashSet α) := ⟨mkHashSet⟩
variable {α : Type u} {_ : BEq α} {_ : Hashable α}
@[inline] def insert (m : HashSet α) (a : α) : HashSet α :=
match m with
| ⟨ m, hw ⟩ => ⟨ m.insert a, WellFormed.insertWff m a hw ⟩
@[inline] def erase (m : HashSet α) (a : α) : HashSet α :=
match m with
| ⟨ m, hw ⟩ => ⟨ m.erase a, WellFormed.eraseWff m a hw ⟩
@[inline] def find? (m : HashSet α) (a : α) : Option α :=
match m with
| ⟨ m, _ ⟩ => m.find? a
@[inline] def contains (m : HashSet α) (a : α) : Bool :=
match m with
| ⟨ m, _ ⟩ => m.contains a
@[inline] def foldM {δ : Type w} {m : Type w → Type w} [Monad m] (f : δ → α → m δ) (init : δ) (h : HashSet α) : m δ :=
match h with
| ⟨ h, _ ⟩ => h.foldM f init
@[inline] def fold {δ : Type w} (f : δ → α → δ) (init : δ) (m : HashSet α) : δ :=
match m with
| ⟨ m, _ ⟩ => m.fold f init
@[inline] def forM {m : Type w → Type w} [Monad m] (h : HashSet α) (f : α → m PUnit) : m PUnit :=
match h with
| ⟨h, _⟩ => h.forM f
instance : ForM m (HashSet α) α where
forM := HashSet.forM
@[inline] def size (m : HashSet α) : Nat :=
match m with
| ⟨ {size := sz, ..}, _ ⟩ => sz
@[inline] def isEmpty (m : HashSet α) : Bool :=
m.size = 0
def toList (m : HashSet α) : List α :=
m.fold (init := []) fun r a => a::r
def toArray (m : HashSet α) : Array α :=
m.fold (init := #[]) fun r a => r.push a
def numBuckets (m : HashSet α) : Nat :=
m.val.buckets.val.size