doc: binary search trees
This commit is contained in:
parent
8636594dac
commit
ed935ed7a7
2 changed files with 225 additions and 0 deletions
|
|
@ -2,6 +2,7 @@ Examples
|
|||
========
|
||||
|
||||
- [Palindromes](examples/palindromes.lean.html)
|
||||
- [Binary Search Trees](examples/bintree.lean.html)
|
||||
- [A Certified Type Checker](examples/tc.lean.html)
|
||||
- [The Well-Typed Interpreter](examples/interp.lean.html)
|
||||
- [Dependent de Bruijn Indices](examples/deBruijn.lean.html)
|
||||
|
|
|
|||
224
doc/examples/bintree.lean
Normal file
224
doc/examples/bintree.lean
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
/-|
|
||||
==========================================
|
||||
Binary Search Trees
|
||||
==========================================
|
||||
|
||||
If the type of keys can be totally ordered -- that is, it supports a well-behaved `≤` comparison --
|
||||
then maps can be implemented with binary search trees (BSTs). Insert and lookup operations on BSTs take time
|
||||
proportional to the height of the tree. If the tree is balanced, the operations therefore take logarithmic time.
|
||||
|
||||
This example is based on a similar example found in the ["Sofware Foundations"](https://softwarefoundations.cis.upenn.edu/vfa-current/SearchTree.html)
|
||||
book (volume 3).
|
||||
-/
|
||||
|
||||
/-|
|
||||
We use `Nat` as the key type in our implementation of BSTs,
|
||||
since it has a convenient total order with lots of theorems and automation available.
|
||||
We leave as an exercise to the reader the generalization to arbitrary types.
|
||||
-/
|
||||
|
||||
inductive Tree (β : Type v) where
|
||||
| leaf
|
||||
| node (left : Tree β) (key : Nat) (value : β) (right : Tree β)
|
||||
deriving Repr
|
||||
|
||||
/-|
|
||||
The function `contains` returns `true` iff the given gree contains the key `k`.
|
||||
-/
|
||||
def Tree.contains (t : Tree β) (k : Nat) : Bool :=
|
||||
match t with
|
||||
| leaf => false
|
||||
| node left key value right =>
|
||||
if k < key then
|
||||
left.contains k
|
||||
else if key < k then
|
||||
right.contains k
|
||||
else
|
||||
true
|
||||
|
||||
/-|
|
||||
`t.find? k` returns `some v` if `v` is the value bound to key `k` in the tree `t`. It returns `none` otherwise.
|
||||
-/
|
||||
def Tree.find? (t : Tree β) (k : Nat) : Option β :=
|
||||
match t with
|
||||
| leaf => none
|
||||
| node left key value right =>
|
||||
if k < key then
|
||||
left.find? k
|
||||
else if key < k then
|
||||
right.find? k
|
||||
else
|
||||
some value
|
||||
|
||||
/-|
|
||||
`t.insert k v` is the map containing all the bindings of `t` along with a binding of `k` to `v`.
|
||||
-/
|
||||
def Tree.insert (t : Tree β) (k : Nat) (v : β) : Tree β :=
|
||||
match t with
|
||||
| leaf => node leaf k v leaf
|
||||
| node left key value right =>
|
||||
if k < key then
|
||||
node (left.insert k v) key value right
|
||||
else if key < k then
|
||||
node left key value (right.insert k v)
|
||||
else
|
||||
node left k v right
|
||||
/-|
|
||||
Let's add a new operation to our tree: converting it to an association list that contains the key--value bindings from the tree stored as pairs.
|
||||
If that list is sorted by the keys, then any two trees that represent the same map would be converted to the same list.
|
||||
Here's a function that does so with an in-order traversal of the tree.
|
||||
-/
|
||||
def Tree.toList (t : Tree β) : List (Nat × β) :=
|
||||
match t with
|
||||
| leaf => []
|
||||
| node left key value right => left.toList ++ [(key, value)] ++ right.toList
|
||||
|
||||
#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one"
|
||||
#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one" |>.toList
|
||||
|
||||
/-|
|
||||
The implemention of `Tree.toList` is inefficient because of how it uses the `++` operator.
|
||||
On a balanced tree its running time is linearithmic, because it does a linear number of
|
||||
concatentations at each level of the tree. On an unbalanced tree it's quadratic time.
|
||||
Here's a tail-recursive implementation than runs in linear time, regardless of whether the tree is balanced:
|
||||
-/
|
||||
def Tree.toListTR (t : Tree β) : List (Nat × β) :=
|
||||
go t []
|
||||
where
|
||||
go (t : Tree β) (acc : List (Nat × β)) : List (Nat × β) :=
|
||||
match t with
|
||||
| leaf => acc
|
||||
| node left key value right => left.go ((key, value) :: right.go acc)
|
||||
|
||||
/-|
|
||||
We now prove that `t.toList` and `t.toListTR` return the same list.
|
||||
The proof is on induction, and as we used the auxiliary function `go`
|
||||
to define `Tree.toListTR`, we use an auxiliary theorem here too.
|
||||
-/
|
||||
theorem Tree.toList_eq_toListTR (t : Tree β) : t.toList = t.toListTR := by
|
||||
simp [toListTR, go t []]
|
||||
where
|
||||
go (t : Tree β) (acc : List (Nat × β)) : toListTR.go t acc = t.toList ++ acc := by
|
||||
induction t generalizing acc <;> simp [toListTR.go, toList, *, List.append_assoc]
|
||||
|
||||
/-|
|
||||
The `[csimp]` annotation instructs the Lean code generator to replace
|
||||
any `Tree.toList` with `Tree.toListTR` when generating code.
|
||||
-/
|
||||
@[csimp] theorem Tree.toList_eq_toListTR_csimp : @Tree.toList = @Tree.toListTR := by
|
||||
funext β t
|
||||
apply toList_eq_toListTR
|
||||
|
||||
/-|
|
||||
The implementations of `Tree.find?` and `Tree.insert` assume that values of type tree obey the BST invariant:
|
||||
for any non-empty node with key `k`, all the values of the `left` subtree are less than `k` and all the values
|
||||
of the right subtree are greater than `k`. But that invariant is not part of the definition of tree.
|
||||
|
||||
So, let's formalize the BST invariant. Here's one way to do so. First, we define a helper `ForallTree`
|
||||
to express that idea that a predicate holds at every node of a tree:
|
||||
-/
|
||||
inductive ForallTree (p : Nat → β → Prop) : Tree β → Prop
|
||||
| leaf : ForallTree p .leaf
|
||||
| node : ForallTree p left → p key value → ForallTree p right → ForallTree p (.node left key value right)
|
||||
|
||||
|
||||
/-|
|
||||
Second, we define the BST invariant:
|
||||
An empty tree is a BST.
|
||||
A non-empty tree is a BST if all its left nodes have a lesser key, its right nodes have a greater key, and the left and right subtrees are themselves BSTs.
|
||||
-/
|
||||
inductive BST : Tree β → Prop
|
||||
| leaf : BST .leaf
|
||||
| node :
|
||||
{value : β} →
|
||||
ForallTree (fun k v => k < key) left →
|
||||
ForallTree (fun k v => key < k) right →
|
||||
BST left → BST right →
|
||||
BST (.node left key value right)
|
||||
|
||||
/-|
|
||||
We now prove that `Tree.insert` preserves the BST invariant using induction and case analysis.
|
||||
-/
|
||||
|
||||
theorem Tree.forall_insert_of_forall (h₁ : ForallTree p t) (h₂ : p key value) : ForallTree p (t.insert key value) := by
|
||||
induction h₁ with
|
||||
| leaf => exact .node .leaf h₂ .leaf
|
||||
| node hl hp hr ihl ihr =>
|
||||
rename Nat => k
|
||||
simp [insert]
|
||||
by_cases hlt : key < k <;> simp [hlt]
|
||||
· exact .node ihl hp hr
|
||||
· by_cases hgt : k < key <;> simp [hgt]
|
||||
· exact .node hl hp ihr
|
||||
· have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
|
||||
subst key
|
||||
exact .node hl h₂ hr
|
||||
|
||||
theorem Tree.bst_insert_of_bst {t : Tree β} (h : BST t) (key : Nat) (value : β) : BST (t.insert key value) := by
|
||||
induction h with
|
||||
| leaf => exact .node .leaf .leaf .leaf .leaf
|
||||
| node h₁ h₂ b₁ b₂ ih₁ ih₂ =>
|
||||
rename Nat => k
|
||||
simp [insert]
|
||||
by_cases hlt : key < k <;> simp [*]
|
||||
· exact .node (forall_insert_of_forall h₁ hlt) h₂ ih₁ b₂
|
||||
· by_cases hgt : k < key <;> simp [*]
|
||||
· exact .node h₁ (forall_insert_of_forall h₂ hgt) b₁ ih₂
|
||||
· have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
|
||||
subst key
|
||||
exact .node h₁ h₂ b₁ b₂
|
||||
|
||||
/-|
|
||||
Now, we define the type `BinTree` using a `Subtype` that states that only trees satisfying the BST invariant are `BinTree`s.
|
||||
-/
|
||||
def BinTree (β : Type u) := { t : Tree β // BST t }
|
||||
|
||||
def BinTree.mk : BinTree β :=
|
||||
⟨.leaf, .leaf⟩
|
||||
|
||||
def BinTree.contains (b : BinTree β) (k : Nat) : Bool :=
|
||||
b.val.contains k
|
||||
|
||||
def BinTree.find? (b : BinTree β) (k : Nat) : Option β :=
|
||||
b.val.find? k
|
||||
|
||||
def BinTree.insert (b : BinTree β) (k : Nat) (v : β) : BinTree β :=
|
||||
⟨b.val.insert k v, b.val.bst_insert_of_bst b.property k v⟩
|
||||
|
||||
attribute [local simp] BinTree.mk BinTree.contains BinTree.find? BinTree.insert Tree.find? Tree.contains Tree.insert
|
||||
|
||||
/-|
|
||||
Finally, we prove that `BinTree.find?` and `BinTree.insert` satisfy the map properties.
|
||||
-/
|
||||
|
||||
theorem BinTree.find_mk {β : Type u} (k : Nat) : (@BinTree.mk β).find? k = none := by
|
||||
simp
|
||||
|
||||
theorem BinTree.find_insert (b : BinTree β) (k : Nat) (v : β) : (b.insert k v).find? k = some v := by
|
||||
let ⟨t, h⟩ := b; simp
|
||||
induction t with simp
|
||||
| node left key value right ihl ihr =>
|
||||
by_cases hlt : k < key <;> simp [*]
|
||||
· cases h; apply ihl; assumption
|
||||
· by_cases hgt : key < k <;> simp [*]
|
||||
cases h; apply ihr; assumption
|
||||
|
||||
theorem BinTree.find_insert_of_ne (b : BinTree β) (h : k ≠ k') (v : β) : (b.insert k v).find? k' = b.find? k' := by
|
||||
let ⟨t, h⟩ := b; simp
|
||||
induction t with simp
|
||||
| leaf =>
|
||||
split <;> simp <;> split <;> simp
|
||||
have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
|
||||
contradiction
|
||||
| node left key value right ihl ihr =>
|
||||
let .node hl hr bl br := h
|
||||
have ihl := ihl bl
|
||||
have ihr := ihr br
|
||||
by_cases hlt : k < key <;> simp [*]
|
||||
by_cases hlt : key < k <;> simp [*]
|
||||
have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
|
||||
subst key
|
||||
by_cases hlt : k' < k <;> simp [*]
|
||||
by_cases hlt : k < k' <;> simp [*]
|
||||
have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
|
||||
contradiction
|
||||
Loading…
Add table
Reference in a new issue