feat(library/init/data/hashmap): hash function produces an uint32 instead of nat

Most efficient hash functions use uint32/uint64 and produce values
that do not fit in out small nat representation. Thus, GMP big numbers
would have to be created.
This commit is contained in:
Leonardo de Moura 2018-05-03 17:37:45 -07:00
parent 7aeae54522
commit af4f831a9f
7 changed files with 69 additions and 52 deletions

View file

@ -42,6 +42,7 @@ reserve infixl ` - `:65
reserve infixl ` * `:70
reserve infixl ` / `:70
reserve infixl ` % `:70
reserve infixl ` %ₙ `:70
reserve prefix `-`:100
reserve infixr ` ^ `:80
@ -324,6 +325,7 @@ class has_sub (α : Type u) := (sub : ααα)
class has_div (α : Type u) := (div : ααα)
class has_dvd (α : Type u) := (dvd : αα → Prop)
class has_mod (α : Type u) := (mod : ααα)
class has_modn (α : Type u) := (modn : α → nat → α)
class has_le (α : Type u) := (le : αα → Prop)
class has_lt (α : Type u) := (lt : αα → Prop)
class has_append (α : Type u) := (append : ααα)
@ -359,6 +361,7 @@ infix - := has_sub.sub
infix / := has_div.div
infix := has_dvd.dvd
infix % := has_mod.mod
infix %ₙ := has_modn.modn
prefix - := has_neg.neg
infix <= := has_le.le
infix ≤ := has_le.le

View file

@ -56,11 +56,19 @@ def write' (a : array α) (i : nat) (v : α) : array α :=
if h : i < a.sz then a.write ⟨i, h⟩ v else a
/- TODO: add builtin -/
def uread [inhabited α] (a : array α) (i : usize) : α :=
def uread (a : array α) (i : uint32) (h : i.val < a.sz) : α :=
a.read ⟨i.val, h⟩
/- TODO: add builtin -/
def uwrite (a : array α) (i : uint32) (v : α) (h : i.val < a.sz) : array α :=
a.write ⟨i.val, h⟩ v
/- TODO: add builtin -/
def uread' [inhabited α] (a : array α) (i : uint32) : α :=
if h : i.val < a.sz then a.read ⟨i.val, h⟩ else default α
/- TODO: add builtin -/
def uwrite (a : array α) (i : usize) (v : α) : array α :=
def uwrite' (a : array α) (i : uint32) (v : α) : array α :=
if h : i.val < a.sz then a.write ⟨i.val, h⟩ v else a
/- TODO: mark as builtin -/

View file

@ -51,7 +51,7 @@ protected def sub : fin n → fin n → fin n
| ⟨a, h⟩ ⟨b, _⟩ := ⟨(a + (n - b)) % n, mlt h⟩
/-
Remark: sub/mod/div can be defined without using (% n), but
Remark: mod/div/modn can be defined without using (% n), but
we are trying to minimize the number of nat theorems
needed to boostrap Lean.
-/
@ -62,6 +62,9 @@ protected def mod : fin n → fin n → fin n
protected def div : fin n → fin n → fin n
| ⟨a, h⟩ ⟨b, _⟩ := ⟨(a / b) % n, mlt h⟩
protected def modn : fin n → nat → fin n
| ⟨a, h⟩ m := ⟨(a % m) % n, mlt h⟩
instance : has_zero (fin (succ n)) := ⟨⟨0, succ_pos n⟩⟩
instance : has_one (fin (succ n)) := ⟨of_nat 1⟩
instance : has_add (fin n) := ⟨fin.add⟩
@ -69,6 +72,7 @@ instance : has_sub (fin n) := ⟨fin.sub⟩
instance : has_mul (fin n) := ⟨fin.mul⟩
instance : has_mod (fin n) := ⟨fin.mod⟩
instance : has_div (fin n) := ⟨fin.div⟩
instance : has_modn (fin n) := ⟨fin.modn⟩
theorem eq_of_veq : ∀ {i j : fin n}, (val i) = (val j) → i = j
| ⟨iv, ilt₁⟩ ⟨.(iv), ilt₂⟩ rfl := rfl
@ -82,6 +86,9 @@ theorem ne_of_vne {i j : fin n} (h : val i ≠ val j) : i ≠ j :=
theorem vne_of_ne {i j : fin n} (h : i ≠ j) : val i ≠ val j :=
λ h', absurd (eq_of_veq h') h
theorem modn_lt : ∀ {m : nat} (i : fin n), m > 0 → (i %ₙ m).val < m
| m ⟨a, h⟩ hp := nat.lt_of_le_of_lt (mod_le _ _) (mod_lt _ hp)
end fin
open fin

View file

@ -10,10 +10,10 @@ universes u v w
def bucket_array (α : Type u) (β : α → Type v) :=
{ b : array (list (Σ a, β a)) // b.sz > 0 }
def bucket_array.write {α : Type u} {β : α → Type v} (data : bucket_array α β) (i : fin data.val.sz) (d : list (Σ a, β a)) : bucket_array α β :=
⟨ data.val.write i d,
calc (data.val.write i d).sz = data.val.sz : array.sz_write_eq _ _ _
... > 0 : data.property ⟩
def bucket_array.uwrite {α : Type u} {β : α → Type v} (data : bucket_array α β) (i : uint32) (d : list (Σ a, β a)) (h : i.val < data.val.sz) : bucket_array α β :=
⟨ data.val.uwrite i d h,
calc (data.val.uwrite i d h).sz = data.val.sz : array.sz_write_eq _ _ _
... > 0 : data.property ⟩
structure hashmap_imp (α : Type u) (β : α → Type v) :=
(size : nat)
@ -34,12 +34,12 @@ let n := if nbuckets = 0 then 8 else nbuckets in
namespace hashmap_imp
variables {α : Type u} {β : α → Type v}
def mk_idx {n : nat} (h : n > 0) (i : nat) : fin n :=
i % n, nat.mod_lt _ h⟩
def mk_idx {n : nat} (h : n > 0) (u : uint32) : { u : uint32 // u.val < n } :=
u %ₙ n, fin.modn_lt _ h⟩
def reinsert_aux (hash_fn : αnat) (data : bucket_array α β) (a : α) (b : β a) : bucket_array α β :=
let bidx := mk_idx data.property (hash_fn a) in
data.write bidx (⟨a, b⟩ :: data.val.read bidx)
def reinsert_aux (hash_fn : αuint32) (data : bucket_array α β) (a : α) (b : β a) : bucket_array α β :=
let ⟨i, h⟩ := mk_idx data.property (hash_fn a) in
data.uwrite i (⟨a, b⟩ :: data.val.uread i h) h
def fold_buckets {δ : Type w} (data : bucket_array α β) (d : δ) (f : δ → Π a, β a → δ) : δ :=
data.val.foldl d (λ b d, b.foldl (λ r (p : Σ a, β a), f r p.1 p.2) d)
@ -52,10 +52,11 @@ def find_aux [decidable_eq α] (a : α) : list (Σ a, β a) → option (β a)
def contains_aux [decidable_eq α] (a : α) (l : list (Σ a, β a)) : bool :=
(find_aux a l).is_some
def find [decidable_eq α] (hash_fn : αnat) (m : hashmap_imp α β) (a : α) : option (β a) :=
def find [decidable_eq α] (hash_fn : αuint32) (m : hashmap_imp α β) (a : α) : option (β a) :=
match m with
| ⟨_, buckets, nz⟩ :=
find_aux a (buckets.read (mk_idx nz (hash_fn a)))
| ⟨_, buckets⟩ :=
let ⟨i, h⟩ := mk_idx buckets.property (hash_fn a) in
find_aux a (buckets.val.uread i h)
def fold {δ : Type w} (m : hashmap_imp α β) (d : δ) (f : δ → Π a, β a → δ) : δ :=
fold_buckets m.buckets d f
@ -68,15 +69,15 @@ def erase_aux [decidable_eq α] (a : α) : list (Σ a, β a) → list (Σ a, β
| [] := []
| (⟨a', b'⟩::t) := if a' = a then t else ⟨a', b'⟩ :: erase_aux t
def insert [decidable_eq α] (hash_fn : αnat) (m : hashmap_imp α β) (a : α) (b : β a) : hashmap_imp α β :=
def insert [decidable_eq α] (hash_fn : αuint32) (m : hashmap_imp α β) (a : α) (b : β a) : hashmap_imp α β :=
match m with
| ⟨size, buckets⟩ :=
let bidx := mk_idx buckets.property (hash_fn a) in
let bkt := buckets.val.read bidx in
let ⟨i, h⟩ := mk_idx buckets.property (hash_fn a) in
let bkt := buckets.val.uread i h in
if contains_aux a bkt
then ⟨size, buckets.write bidx (replace_aux a b bkt)
then ⟨size, buckets.uwrite i (replace_aux a b bkt) h
else let size' := size + 1 in
let buckets' := buckets.write bidx (⟨a, b⟩::bkt) in
let buckets' := buckets.uwrite i (⟨a, b⟩::bkt) h in
if size' <= buckets.val.sz
then ⟨size', buckets'⟩
else let nbuckets' := buckets.val.sz * 2 in
@ -84,32 +85,31 @@ match m with
⟨ size',
fold_buckets buckets' ⟨mk_array nbuckets' [], nz'⟩ (reinsert_aux hash_fn) ⟩
def erase [decidable_eq α] (hash_fn : αnat) (m : hashmap_imp α β) (a : α) : hashmap_imp α β :=
def erase [decidable_eq α] (hash_fn : αuint32) (m : hashmap_imp α β) (a : α) : hashmap_imp α β :=
match m with
| ⟨ size, buckets ⟩ :=
let bidx := mk_idx buckets.property (hash_fn a) in
let bkt := buckets.val.read bidx in
if contains_aux a bkt
then ⟨size - 1, buckets.write bidx $ erase_aux a bkt⟩
let ⟨i, h⟩ := mk_idx buckets.property (hash_fn a) in
let bkt := buckets.val.uread i h in
if contains_aux a bkt then ⟨size - 1, buckets.uwrite i (erase_aux a bkt) h⟩
else m
inductive well_formed [decidable_eq α] (hash_fn : αnat) : hashmap_imp α β → Prop
inductive well_formed [decidable_eq α] (hash_fn : αuint32) : hashmap_imp α β → Prop
| mk_wff : ∀ n, well_formed (mk_hashmap_imp n)
| insert_wff : ∀ m a b, well_formed m → well_formed (insert hash_fn m a b)
| erase_wff : ∀ m a, well_formed m → well_formed (erase hash_fn m a)
end hashmap_imp
def d_hashmap (α : Type u) (β : α → Type v) [decidable_eq α] (h : αnat) :=
def d_hashmap (α : Type u) (β : α → Type v) [decidable_eq α] (h : αuint32) :=
{ m : hashmap_imp α β // m.well_formed h }
open hashmap_imp
def mk_d_hashmap {α : Type u} {β : α → Type v} [decidable_eq α] (h : αnat) (nbuckets := 8) : d_hashmap α β h :=
def mk_d_hashmap {α : Type u} {β : α → Type v} [decidable_eq α] (h : αuint32) (nbuckets := 8) : d_hashmap α β h :=
⟨ mk_hashmap_imp nbuckets, well_formed.mk_wff h nbuckets ⟩
namespace d_hashmap
variables {α : Type u} {β : α → Type v} [decidable_eq α] {h : αnat}
variables {α : Type u} {β : α → Type v} [decidable_eq α] {h : αuint32}
def insert (m : d_hashmap α β h) (a : α) (b : β a) : d_hashmap α β h :=
match m with
@ -144,14 +144,14 @@ m.size = 0
end d_hashmap
def hashmap (α : Type u) (β : Type v) [decidable_eq α] (h : αnat) :=
def hashmap (α : Type u) (β : Type v) [decidable_eq α] (h : αuint32) :=
d_hashmap α (λ _, β) h
def mk_hashmap {α : Type u} {β : Type v} [decidable_eq α] (h : αnat) (nbuckets := 8) : hashmap α β h :=
def mk_hashmap {α : Type u} {β : Type v} [decidable_eq α] (h : αuint32) (nbuckets := 8) : hashmap α β h :=
mk_d_hashmap h nbuckets
namespace hashmap
variables {α : Type u} {β : Type v} [decidable_eq α] {h : αnat}
variables {α : Type u} {β : Type v} [decidable_eq α] {h : αuint32}
@[inline] def insert (m : hashmap α β h) (a : α) (b : β) : hashmap α β h :=
d_hashmap.insert m a b

View file

@ -93,4 +93,11 @@ mod.induction_on x y
have heq : x % y = x, from mod_eq_of_lt hgt,
heq.symm ▸ hgt))
theorem mod_le (x y : ) : x % y ≤ x :=
or.elim (nat.lt_or_ge x y)
(λ h₁ : x < y, (mod_eq_of_lt h₁).symm ▸ nat.le_refl _)
(λ h₁ : x ≥ y, or.elim (eq_zero_or_pos y)
(λ h₂ : y = 0, h₂.symm ▸ (nat.mod_zero x).symm ▸ nat.le_refl _)
(λ h₂ : y > 0, nat.le_trans (nat.le_of_lt (nat.mod_lt _ h₂)) h₁))
end nat

View file

@ -31,6 +31,7 @@ instance : has_add uint16 := ⟨fin.add⟩
instance : has_sub uint16 := ⟨fin.sub⟩
instance : has_mul uint16 := ⟨fin.mul⟩
instance : has_mod uint16 := ⟨fin.mod⟩
instance : has_modn uint16 := ⟨fin.modn⟩
instance : has_div uint16 := ⟨fin.div⟩
instance : has_lt uint16 := ⟨fin.lt⟩
instance : has_le uint16 := ⟨fin.le⟩
@ -41,6 +42,7 @@ instance : has_add uint32 := ⟨fin.add⟩
instance : has_sub uint32 := ⟨fin.sub⟩
instance : has_mul uint32 := ⟨fin.mul⟩
instance : has_mod uint32 := ⟨fin.mod⟩
instance : has_modn uint32 := ⟨fin.modn⟩
instance : has_div uint32 := ⟨fin.div⟩
instance : has_lt uint32 := ⟨fin.lt⟩
instance : has_le uint32 := ⟨fin.le⟩
@ -51,24 +53,14 @@ instance : has_add uint64 := ⟨fin.add⟩
instance : has_sub uint64 := ⟨fin.sub⟩
instance : has_mul uint64 := ⟨fin.mul⟩
instance : has_mod uint64 := ⟨fin.mod⟩
instance : has_modn uint64 := ⟨fin.modn⟩
instance : has_div uint64 := ⟨fin.div⟩
instance : has_lt uint64 := ⟨fin.lt⟩
instance : has_le uint64 := ⟨fin.le⟩
def uint16.of_nat (n : nat) : uint16 :=
fin.of_nat n
def uint16.to_nat (u : uint16) : nat :=
fin.val u
def uint32.of_nat (n : nat) : uint32 :=
fin.of_nat n
def uint32.to_nat (u : uint32) : nat :=
fin.val u
def uint64.of_nat (n : nat) : uint64 :=
fin.of_nat n
def uint64.to_nat (u : uint64) : nat :=
fin.val u
def uint16.of_nat (n : nat) : uint16 := fin.of_nat n
def uint16.to_nat (u : uint16) : nat := fin.val u
def uint32.of_nat (n : nat) : uint32 := fin.of_nat n
def uint32.to_nat (u : uint32) : nat := fin.val u
def uint64.of_nat (n : nat) : uint64 := fin.of_nat n
def uint64.to_nat (u : uint64) : nat := fin.val u

View file

@ -16,16 +16,16 @@ structure disjoint_set.node (α : Type u) :=
(find : α)
(rank : nat := 0)
structure disjoint_set (α : Type u) [decidable_eq α] (h : αnat) : Type u :=
structure disjoint_set (α : Type u) [decidable_eq α] (h : αuint32) : Type u :=
(map : hashmap α (disjoint_set.node α) h)
variables {α : Type u}
def mk_disjoint_set [decidable_eq α] (h : αnat) : disjoint_set α h :=
def mk_disjoint_set [decidable_eq α] (h : αuint32) : disjoint_set α h :=
⟨mk_hashmap h⟩
namespace disjoint_set
variables [decidable_eq α] {h : αnat}
variables [decidable_eq α] {h : αuint32}
private def find_aux : nat → α → hashmap α (node α) h → node α
| 0 a m := { find := a, rank := 0 }