feat(library/init/data/hashmap): hash function produces an uint32 instead of nat
Most efficient hash functions use uint32/uint64 and produce values that do not fit in out small nat representation. Thus, GMP big numbers would have to be created.
This commit is contained in:
parent
7aeae54522
commit
af4f831a9f
7 changed files with 69 additions and 52 deletions
|
|
@ -42,6 +42,7 @@ reserve infixl ` - `:65
|
|||
reserve infixl ` * `:70
|
||||
reserve infixl ` / `:70
|
||||
reserve infixl ` % `:70
|
||||
reserve infixl ` %ₙ `:70
|
||||
reserve prefix `-`:100
|
||||
reserve infixr ` ^ `:80
|
||||
|
||||
|
|
@ -324,6 +325,7 @@ class has_sub (α : Type u) := (sub : α → α → α)
|
|||
class has_div (α : Type u) := (div : α → α → α)
|
||||
class has_dvd (α : Type u) := (dvd : α → α → Prop)
|
||||
class has_mod (α : Type u) := (mod : α → α → α)
|
||||
class has_modn (α : Type u) := (modn : α → nat → α)
|
||||
class has_le (α : Type u) := (le : α → α → Prop)
|
||||
class has_lt (α : Type u) := (lt : α → α → Prop)
|
||||
class has_append (α : Type u) := (append : α → α → α)
|
||||
|
|
@ -359,6 +361,7 @@ infix - := has_sub.sub
|
|||
infix / := has_div.div
|
||||
infix ∣ := has_dvd.dvd
|
||||
infix % := has_mod.mod
|
||||
infix %ₙ := has_modn.modn
|
||||
prefix - := has_neg.neg
|
||||
infix <= := has_le.le
|
||||
infix ≤ := has_le.le
|
||||
|
|
|
|||
|
|
@ -56,11 +56,19 @@ def write' (a : array α) (i : nat) (v : α) : array α :=
|
|||
if h : i < a.sz then a.write ⟨i, h⟩ v else a
|
||||
|
||||
/- TODO: add builtin -/
|
||||
def uread [inhabited α] (a : array α) (i : usize) : α :=
|
||||
def uread (a : array α) (i : uint32) (h : i.val < a.sz) : α :=
|
||||
a.read ⟨i.val, h⟩
|
||||
|
||||
/- TODO: add builtin -/
|
||||
def uwrite (a : array α) (i : uint32) (v : α) (h : i.val < a.sz) : array α :=
|
||||
a.write ⟨i.val, h⟩ v
|
||||
|
||||
/- TODO: add builtin -/
|
||||
def uread' [inhabited α] (a : array α) (i : uint32) : α :=
|
||||
if h : i.val < a.sz then a.read ⟨i.val, h⟩ else default α
|
||||
|
||||
/- TODO: add builtin -/
|
||||
def uwrite (a : array α) (i : usize) (v : α) : array α :=
|
||||
def uwrite' (a : array α) (i : uint32) (v : α) : array α :=
|
||||
if h : i.val < a.sz then a.write ⟨i.val, h⟩ v else a
|
||||
|
||||
/- TODO: mark as builtin -/
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ protected def sub : fin n → fin n → fin n
|
|||
| ⟨a, h⟩ ⟨b, _⟩ := ⟨(a + (n - b)) % n, mlt h⟩
|
||||
|
||||
/-
|
||||
Remark: sub/mod/div can be defined without using (% n), but
|
||||
Remark: mod/div/modn can be defined without using (% n), but
|
||||
we are trying to minimize the number of nat theorems
|
||||
needed to boostrap Lean.
|
||||
-/
|
||||
|
|
@ -62,6 +62,9 @@ protected def mod : fin n → fin n → fin n
|
|||
protected def div : fin n → fin n → fin n
|
||||
| ⟨a, h⟩ ⟨b, _⟩ := ⟨(a / b) % n, mlt h⟩
|
||||
|
||||
protected def modn : fin n → nat → fin n
|
||||
| ⟨a, h⟩ m := ⟨(a % m) % n, mlt h⟩
|
||||
|
||||
instance : has_zero (fin (succ n)) := ⟨⟨0, succ_pos n⟩⟩
|
||||
instance : has_one (fin (succ n)) := ⟨of_nat 1⟩
|
||||
instance : has_add (fin n) := ⟨fin.add⟩
|
||||
|
|
@ -69,6 +72,7 @@ instance : has_sub (fin n) := ⟨fin.sub⟩
|
|||
instance : has_mul (fin n) := ⟨fin.mul⟩
|
||||
instance : has_mod (fin n) := ⟨fin.mod⟩
|
||||
instance : has_div (fin n) := ⟨fin.div⟩
|
||||
instance : has_modn (fin n) := ⟨fin.modn⟩
|
||||
|
||||
theorem eq_of_veq : ∀ {i j : fin n}, (val i) = (val j) → i = j
|
||||
| ⟨iv, ilt₁⟩ ⟨.(iv), ilt₂⟩ rfl := rfl
|
||||
|
|
@ -82,6 +86,9 @@ theorem ne_of_vne {i j : fin n} (h : val i ≠ val j) : i ≠ j :=
|
|||
theorem vne_of_ne {i j : fin n} (h : i ≠ j) : val i ≠ val j :=
|
||||
λ h', absurd (eq_of_veq h') h
|
||||
|
||||
theorem modn_lt : ∀ {m : nat} (i : fin n), m > 0 → (i %ₙ m).val < m
|
||||
| m ⟨a, h⟩ hp := nat.lt_of_le_of_lt (mod_le _ _) (mod_lt _ hp)
|
||||
|
||||
end fin
|
||||
|
||||
open fin
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ universes u v w
|
|||
def bucket_array (α : Type u) (β : α → Type v) :=
|
||||
{ b : array (list (Σ a, β a)) // b.sz > 0 }
|
||||
|
||||
def bucket_array.write {α : Type u} {β : α → Type v} (data : bucket_array α β) (i : fin data.val.sz) (d : list (Σ a, β a)) : bucket_array α β :=
|
||||
⟨ data.val.write i d,
|
||||
calc (data.val.write i d).sz = data.val.sz : array.sz_write_eq _ _ _
|
||||
... > 0 : data.property ⟩
|
||||
def bucket_array.uwrite {α : Type u} {β : α → Type v} (data : bucket_array α β) (i : uint32) (d : list (Σ a, β a)) (h : i.val < data.val.sz) : bucket_array α β :=
|
||||
⟨ data.val.uwrite i d h,
|
||||
calc (data.val.uwrite i d h).sz = data.val.sz : array.sz_write_eq _ _ _
|
||||
... > 0 : data.property ⟩
|
||||
|
||||
structure hashmap_imp (α : Type u) (β : α → Type v) :=
|
||||
(size : nat)
|
||||
|
|
@ -34,12 +34,12 @@ let n := if nbuckets = 0 then 8 else nbuckets in
|
|||
namespace hashmap_imp
|
||||
variables {α : Type u} {β : α → Type v}
|
||||
|
||||
def mk_idx {n : nat} (h : n > 0) (i : nat) : fin n :=
|
||||
⟨i % n, nat.mod_lt _ h⟩
|
||||
def mk_idx {n : nat} (h : n > 0) (u : uint32) : { u : uint32 // u.val < n } :=
|
||||
⟨u %ₙ n, fin.modn_lt _ h⟩
|
||||
|
||||
def reinsert_aux (hash_fn : α → nat) (data : bucket_array α β) (a : α) (b : β a) : bucket_array α β :=
|
||||
let bidx := mk_idx data.property (hash_fn a) in
|
||||
data.write bidx (⟨a, b⟩ :: data.val.read bidx)
|
||||
def reinsert_aux (hash_fn : α → uint32) (data : bucket_array α β) (a : α) (b : β a) : bucket_array α β :=
|
||||
let ⟨i, h⟩ := mk_idx data.property (hash_fn a) in
|
||||
data.uwrite i (⟨a, b⟩ :: data.val.uread i h) h
|
||||
|
||||
def fold_buckets {δ : Type w} (data : bucket_array α β) (d : δ) (f : δ → Π a, β a → δ) : δ :=
|
||||
data.val.foldl d (λ b d, b.foldl (λ r (p : Σ a, β a), f r p.1 p.2) d)
|
||||
|
|
@ -52,10 +52,11 @@ def find_aux [decidable_eq α] (a : α) : list (Σ a, β a) → option (β a)
|
|||
def contains_aux [decidable_eq α] (a : α) (l : list (Σ a, β a)) : bool :=
|
||||
(find_aux a l).is_some
|
||||
|
||||
def find [decidable_eq α] (hash_fn : α → nat) (m : hashmap_imp α β) (a : α) : option (β a) :=
|
||||
def find [decidable_eq α] (hash_fn : α → uint32) (m : hashmap_imp α β) (a : α) : option (β a) :=
|
||||
match m with
|
||||
| ⟨_, buckets, nz⟩ :=
|
||||
find_aux a (buckets.read (mk_idx nz (hash_fn a)))
|
||||
| ⟨_, buckets⟩ :=
|
||||
let ⟨i, h⟩ := mk_idx buckets.property (hash_fn a) in
|
||||
find_aux a (buckets.val.uread i h)
|
||||
|
||||
def fold {δ : Type w} (m : hashmap_imp α β) (d : δ) (f : δ → Π a, β a → δ) : δ :=
|
||||
fold_buckets m.buckets d f
|
||||
|
|
@ -68,15 +69,15 @@ def erase_aux [decidable_eq α] (a : α) : list (Σ a, β a) → list (Σ a, β
|
|||
| [] := []
|
||||
| (⟨a', b'⟩::t) := if a' = a then t else ⟨a', b'⟩ :: erase_aux t
|
||||
|
||||
def insert [decidable_eq α] (hash_fn : α → nat) (m : hashmap_imp α β) (a : α) (b : β a) : hashmap_imp α β :=
|
||||
def insert [decidable_eq α] (hash_fn : α → uint32) (m : hashmap_imp α β) (a : α) (b : β a) : hashmap_imp α β :=
|
||||
match m with
|
||||
| ⟨size, buckets⟩ :=
|
||||
let bidx := mk_idx buckets.property (hash_fn a) in
|
||||
let bkt := buckets.val.read bidx in
|
||||
let ⟨i, h⟩ := mk_idx buckets.property (hash_fn a) in
|
||||
let bkt := buckets.val.uread i h in
|
||||
if contains_aux a bkt
|
||||
then ⟨size, buckets.write bidx (replace_aux a b bkt)⟩
|
||||
then ⟨size, buckets.uwrite i (replace_aux a b bkt) h⟩
|
||||
else let size' := size + 1 in
|
||||
let buckets' := buckets.write bidx (⟨a, b⟩::bkt) in
|
||||
let buckets' := buckets.uwrite i (⟨a, b⟩::bkt) h in
|
||||
if size' <= buckets.val.sz
|
||||
then ⟨size', buckets'⟩
|
||||
else let nbuckets' := buckets.val.sz * 2 in
|
||||
|
|
@ -84,32 +85,31 @@ match m with
|
|||
⟨ size',
|
||||
fold_buckets buckets' ⟨mk_array nbuckets' [], nz'⟩ (reinsert_aux hash_fn) ⟩
|
||||
|
||||
def erase [decidable_eq α] (hash_fn : α → nat) (m : hashmap_imp α β) (a : α) : hashmap_imp α β :=
|
||||
def erase [decidable_eq α] (hash_fn : α → uint32) (m : hashmap_imp α β) (a : α) : hashmap_imp α β :=
|
||||
match m with
|
||||
| ⟨ size, buckets ⟩ :=
|
||||
let bidx := mk_idx buckets.property (hash_fn a) in
|
||||
let bkt := buckets.val.read bidx in
|
||||
if contains_aux a bkt
|
||||
then ⟨size - 1, buckets.write bidx $ erase_aux a bkt⟩
|
||||
let ⟨i, h⟩ := mk_idx buckets.property (hash_fn a) in
|
||||
let bkt := buckets.val.uread i h in
|
||||
if contains_aux a bkt then ⟨size - 1, buckets.uwrite i (erase_aux a bkt) h⟩
|
||||
else m
|
||||
|
||||
inductive well_formed [decidable_eq α] (hash_fn : α → nat) : hashmap_imp α β → Prop
|
||||
inductive well_formed [decidable_eq α] (hash_fn : α → uint32) : hashmap_imp α β → Prop
|
||||
| mk_wff : ∀ n, well_formed (mk_hashmap_imp n)
|
||||
| insert_wff : ∀ m a b, well_formed m → well_formed (insert hash_fn m a b)
|
||||
| erase_wff : ∀ m a, well_formed m → well_formed (erase hash_fn m a)
|
||||
|
||||
end hashmap_imp
|
||||
|
||||
def d_hashmap (α : Type u) (β : α → Type v) [decidable_eq α] (h : α → nat) :=
|
||||
def d_hashmap (α : Type u) (β : α → Type v) [decidable_eq α] (h : α → uint32) :=
|
||||
{ m : hashmap_imp α β // m.well_formed h }
|
||||
|
||||
open hashmap_imp
|
||||
|
||||
def mk_d_hashmap {α : Type u} {β : α → Type v} [decidable_eq α] (h : α → nat) (nbuckets := 8) : d_hashmap α β h :=
|
||||
def mk_d_hashmap {α : Type u} {β : α → Type v} [decidable_eq α] (h : α → uint32) (nbuckets := 8) : d_hashmap α β h :=
|
||||
⟨ mk_hashmap_imp nbuckets, well_formed.mk_wff h nbuckets ⟩
|
||||
|
||||
namespace d_hashmap
|
||||
variables {α : Type u} {β : α → Type v} [decidable_eq α] {h : α → nat}
|
||||
variables {α : Type u} {β : α → Type v} [decidable_eq α] {h : α → uint32}
|
||||
|
||||
def insert (m : d_hashmap α β h) (a : α) (b : β a) : d_hashmap α β h :=
|
||||
match m with
|
||||
|
|
@ -144,14 +144,14 @@ m.size = 0
|
|||
|
||||
end d_hashmap
|
||||
|
||||
def hashmap (α : Type u) (β : Type v) [decidable_eq α] (h : α → nat) :=
|
||||
def hashmap (α : Type u) (β : Type v) [decidable_eq α] (h : α → uint32) :=
|
||||
d_hashmap α (λ _, β) h
|
||||
|
||||
def mk_hashmap {α : Type u} {β : Type v} [decidable_eq α] (h : α → nat) (nbuckets := 8) : hashmap α β h :=
|
||||
def mk_hashmap {α : Type u} {β : Type v} [decidable_eq α] (h : α → uint32) (nbuckets := 8) : hashmap α β h :=
|
||||
mk_d_hashmap h nbuckets
|
||||
|
||||
namespace hashmap
|
||||
variables {α : Type u} {β : Type v} [decidable_eq α] {h : α → nat}
|
||||
variables {α : Type u} {β : Type v} [decidable_eq α] {h : α → uint32}
|
||||
|
||||
@[inline] def insert (m : hashmap α β h) (a : α) (b : β) : hashmap α β h :=
|
||||
d_hashmap.insert m a b
|
||||
|
|
|
|||
|
|
@ -93,4 +93,11 @@ mod.induction_on x y
|
|||
have heq : x % y = x, from mod_eq_of_lt hgt,
|
||||
heq.symm ▸ hgt))
|
||||
|
||||
theorem mod_le (x y : ℕ) : x % y ≤ x :=
|
||||
or.elim (nat.lt_or_ge x y)
|
||||
(λ h₁ : x < y, (mod_eq_of_lt h₁).symm ▸ nat.le_refl _)
|
||||
(λ h₁ : x ≥ y, or.elim (eq_zero_or_pos y)
|
||||
(λ h₂ : y = 0, h₂.symm ▸ (nat.mod_zero x).symm ▸ nat.le_refl _)
|
||||
(λ h₂ : y > 0, nat.le_trans (nat.le_of_lt (nat.mod_lt _ h₂)) h₁))
|
||||
|
||||
end nat
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ instance : has_add uint16 := ⟨fin.add⟩
|
|||
instance : has_sub uint16 := ⟨fin.sub⟩
|
||||
instance : has_mul uint16 := ⟨fin.mul⟩
|
||||
instance : has_mod uint16 := ⟨fin.mod⟩
|
||||
instance : has_modn uint16 := ⟨fin.modn⟩
|
||||
instance : has_div uint16 := ⟨fin.div⟩
|
||||
instance : has_lt uint16 := ⟨fin.lt⟩
|
||||
instance : has_le uint16 := ⟨fin.le⟩
|
||||
|
|
@ -41,6 +42,7 @@ instance : has_add uint32 := ⟨fin.add⟩
|
|||
instance : has_sub uint32 := ⟨fin.sub⟩
|
||||
instance : has_mul uint32 := ⟨fin.mul⟩
|
||||
instance : has_mod uint32 := ⟨fin.mod⟩
|
||||
instance : has_modn uint32 := ⟨fin.modn⟩
|
||||
instance : has_div uint32 := ⟨fin.div⟩
|
||||
instance : has_lt uint32 := ⟨fin.lt⟩
|
||||
instance : has_le uint32 := ⟨fin.le⟩
|
||||
|
|
@ -51,24 +53,14 @@ instance : has_add uint64 := ⟨fin.add⟩
|
|||
instance : has_sub uint64 := ⟨fin.sub⟩
|
||||
instance : has_mul uint64 := ⟨fin.mul⟩
|
||||
instance : has_mod uint64 := ⟨fin.mod⟩
|
||||
instance : has_modn uint64 := ⟨fin.modn⟩
|
||||
instance : has_div uint64 := ⟨fin.div⟩
|
||||
instance : has_lt uint64 := ⟨fin.lt⟩
|
||||
instance : has_le uint64 := ⟨fin.le⟩
|
||||
|
||||
def uint16.of_nat (n : nat) : uint16 :=
|
||||
fin.of_nat n
|
||||
|
||||
def uint16.to_nat (u : uint16) : nat :=
|
||||
fin.val u
|
||||
|
||||
def uint32.of_nat (n : nat) : uint32 :=
|
||||
fin.of_nat n
|
||||
|
||||
def uint32.to_nat (u : uint32) : nat :=
|
||||
fin.val u
|
||||
|
||||
def uint64.of_nat (n : nat) : uint64 :=
|
||||
fin.of_nat n
|
||||
|
||||
def uint64.to_nat (u : uint64) : nat :=
|
||||
fin.val u
|
||||
def uint16.of_nat (n : nat) : uint16 := fin.of_nat n
|
||||
def uint16.to_nat (u : uint16) : nat := fin.val u
|
||||
def uint32.of_nat (n : nat) : uint32 := fin.of_nat n
|
||||
def uint32.to_nat (u : uint32) : nat := fin.val u
|
||||
def uint64.of_nat (n : nat) : uint64 := fin.of_nat n
|
||||
def uint64.to_nat (u : uint64) : nat := fin.val u
|
||||
|
|
|
|||
|
|
@ -16,16 +16,16 @@ structure disjoint_set.node (α : Type u) :=
|
|||
(find : α)
|
||||
(rank : nat := 0)
|
||||
|
||||
structure disjoint_set (α : Type u) [decidable_eq α] (h : α → nat) : Type u :=
|
||||
structure disjoint_set (α : Type u) [decidable_eq α] (h : α → uint32) : Type u :=
|
||||
(map : hashmap α (disjoint_set.node α) h)
|
||||
|
||||
variables {α : Type u}
|
||||
|
||||
def mk_disjoint_set [decidable_eq α] (h : α → nat) : disjoint_set α h :=
|
||||
def mk_disjoint_set [decidable_eq α] (h : α → uint32) : disjoint_set α h :=
|
||||
⟨mk_hashmap h⟩
|
||||
|
||||
namespace disjoint_set
|
||||
variables [decidable_eq α] {h : α → nat}
|
||||
variables [decidable_eq α] {h : α → uint32}
|
||||
|
||||
private def find_aux : nat → α → hashmap α (node α) h → node α
|
||||
| 0 a m := { find := a, rank := 0 }
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue