doc: binary search trees

This commit is contained in:
Leonardo de Moura 2022-04-01 19:30:28 -07:00
parent 8636594dac
commit ed935ed7a7
2 changed files with 225 additions and 0 deletions

View file

@ -2,6 +2,7 @@ Examples
========
- [Palindromes](examples/palindromes.lean.html)
- [Binary Search Trees](examples/bintree.lean.html)
- [A Certified Type Checker](examples/tc.lean.html)
- [The Well-Typed Interpreter](examples/interp.lean.html)
- [Dependent de Bruijn Indices](examples/deBruijn.lean.html)

224
doc/examples/bintree.lean Normal file
View file

@ -0,0 +1,224 @@
/-|
==========================================
Binary Search Trees
==========================================
If the type of keys can be totally ordered -- that is, it supports a well-behaved `≤` comparison --
then maps can be implemented with binary search trees (BSTs). Insert and lookup operations on BSTs take time
proportional to the height of the tree. If the tree is balanced, the operations therefore take logarithmic time.
This example is based on a similar example found in the ["Sofware Foundations"](https://softwarefoundations.cis.upenn.edu/vfa-current/SearchTree.html)
book (volume 3).
-/
/-|
We use `Nat` as the key type in our implementation of BSTs,
since it has a convenient total order with lots of theorems and automation available.
We leave as an exercise to the reader the generalization to arbitrary types.
-/
inductive Tree (β : Type v) where
| leaf
| node (left : Tree β) (key : Nat) (value : β) (right : Tree β)
deriving Repr
/-|
The function `contains` returns `true` iff the given gree contains the key `k`.
-/
def Tree.contains (t : Tree β) (k : Nat) : Bool :=
match t with
| leaf => false
| node left key value right =>
if k < key then
left.contains k
else if key < k then
right.contains k
else
true
/-|
`t.find? k` returns `some v` if `v` is the value bound to key `k` in the tree `t`. It returns `none` otherwise.
-/
def Tree.find? (t : Tree β) (k : Nat) : Option β :=
match t with
| leaf => none
| node left key value right =>
if k < key then
left.find? k
else if key < k then
right.find? k
else
some value
/-|
`t.insert k v` is the map containing all the bindings of `t` along with a binding of `k` to `v`.
-/
def Tree.insert (t : Tree β) (k : Nat) (v : β) : Tree β :=
match t with
| leaf => node leaf k v leaf
| node left key value right =>
if k < key then
node (left.insert k v) key value right
else if key < k then
node left key value (right.insert k v)
else
node left k v right
/-|
Let's add a new operation to our tree: converting it to an association list that contains the key--value bindings from the tree stored as pairs.
If that list is sorted by the keys, then any two trees that represent the same map would be converted to the same list.
Here's a function that does so with an in-order traversal of the tree.
-/
def Tree.toList (t : Tree β) : List (Nat × β) :=
match t with
| leaf => []
| node left key value right => left.toList ++ [(key, value)] ++ right.toList
#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one"
#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one" |>.toList
/-|
The implemention of `Tree.toList` is inefficient because of how it uses the `++` operator.
On a balanced tree its running time is linearithmic, because it does a linear number of
concatentations at each level of the tree. On an unbalanced tree it's quadratic time.
Here's a tail-recursive implementation than runs in linear time, regardless of whether the tree is balanced:
-/
def Tree.toListTR (t : Tree β) : List (Nat × β) :=
go t []
where
go (t : Tree β) (acc : List (Nat × β)) : List (Nat × β) :=
match t with
| leaf => acc
| node left key value right => left.go ((key, value) :: right.go acc)
/-|
We now prove that `t.toList` and `t.toListTR` return the same list.
The proof is on induction, and as we used the auxiliary function `go`
to define `Tree.toListTR`, we use an auxiliary theorem here too.
-/
theorem Tree.toList_eq_toListTR (t : Tree β) : t.toList = t.toListTR := by
simp [toListTR, go t []]
where
go (t : Tree β) (acc : List (Nat × β)) : toListTR.go t acc = t.toList ++ acc := by
induction t generalizing acc <;> simp [toListTR.go, toList, *, List.append_assoc]
/-|
The `[csimp]` annotation instructs the Lean code generator to replace
any `Tree.toList` with `Tree.toListTR` when generating code.
-/
@[csimp] theorem Tree.toList_eq_toListTR_csimp : @Tree.toList = @Tree.toListTR := by
funext β t
apply toList_eq_toListTR
/-|
The implementations of `Tree.find?` and `Tree.insert` assume that values of type tree obey the BST invariant:
for any non-empty node with key `k`, all the values of the `left` subtree are less than `k` and all the values
of the right subtree are greater than `k`. But that invariant is not part of the definition of tree.
So, let's formalize the BST invariant. Here's one way to do so. First, we define a helper `ForallTree`
to express that idea that a predicate holds at every node of a tree:
-/
inductive ForallTree (p : Nat → β → Prop) : Tree β → Prop
| leaf : ForallTree p .leaf
| node : ForallTree p left → p key value → ForallTree p right → ForallTree p (.node left key value right)
/-|
Second, we define the BST invariant:
An empty tree is a BST.
A non-empty tree is a BST if all its left nodes have a lesser key, its right nodes have a greater key, and the left and right subtrees are themselves BSTs.
-/
inductive BST : Tree β → Prop
| leaf : BST .leaf
| node :
{value : β} →
ForallTree (fun k v => k < key) left →
ForallTree (fun k v => key < k) right →
BST left → BST right →
BST (.node left key value right)
/-|
We now prove that `Tree.insert` preserves the BST invariant using induction and case analysis.
-/
theorem Tree.forall_insert_of_forall (h₁ : ForallTree p t) (h₂ : p key value) : ForallTree p (t.insert key value) := by
induction h₁ with
| leaf => exact .node .leaf h₂ .leaf
| node hl hp hr ihl ihr =>
rename Nat => k
simp [insert]
by_cases hlt : key < k <;> simp [hlt]
· exact .node ihl hp hr
· by_cases hgt : k < key <;> simp [hgt]
· exact .node hl hp ihr
· have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
subst key
exact .node hl h₂ hr
theorem Tree.bst_insert_of_bst {t : Tree β} (h : BST t) (key : Nat) (value : β) : BST (t.insert key value) := by
induction h with
| leaf => exact .node .leaf .leaf .leaf .leaf
| node h₁ h₂ b₁ b₂ ih₁ ih₂ =>
rename Nat => k
simp [insert]
by_cases hlt : key < k <;> simp [*]
· exact .node (forall_insert_of_forall h₁ hlt) h₂ ih₁ b₂
· by_cases hgt : k < key <;> simp [*]
· exact .node h₁ (forall_insert_of_forall h₂ hgt) b₁ ih₂
· have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
subst key
exact .node h₁ h₂ b₁ b₂
/-|
Now, we define the type `BinTree` using a `Subtype` that states that only trees satisfying the BST invariant are `BinTree`s.
-/
def BinTree (β : Type u) := { t : Tree β // BST t }
def BinTree.mk : BinTree β :=
⟨.leaf, .leaf⟩
def BinTree.contains (b : BinTree β) (k : Nat) : Bool :=
b.val.contains k
def BinTree.find? (b : BinTree β) (k : Nat) : Option β :=
b.val.find? k
def BinTree.insert (b : BinTree β) (k : Nat) (v : β) : BinTree β :=
⟨b.val.insert k v, b.val.bst_insert_of_bst b.property k v⟩
attribute [local simp] BinTree.mk BinTree.contains BinTree.find? BinTree.insert Tree.find? Tree.contains Tree.insert
/-|
Finally, we prove that `BinTree.find?` and `BinTree.insert` satisfy the map properties.
-/
theorem BinTree.find_mk {β : Type u} (k : Nat) : (@BinTree.mk β).find? k = none := by
simp
theorem BinTree.find_insert (b : BinTree β) (k : Nat) (v : β) : (b.insert k v).find? k = some v := by
let ⟨t, h⟩ := b; simp
induction t with simp
| node left key value right ihl ihr =>
by_cases hlt : k < key <;> simp [*]
· cases h; apply ihl; assumption
· by_cases hgt : key < k <;> simp [*]
cases h; apply ihr; assumption
theorem BinTree.find_insert_of_ne (b : BinTree β) (h : k ≠ k') (v : β) : (b.insert k v).find? k' = b.find? k' := by
let ⟨t, h⟩ := b; simp
induction t with simp
| leaf =>
split <;> simp <;> split <;> simp
have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
contradiction
| node left key value right ihl ihr =>
let .node hl hr bl br := h
have ihl := ihl bl
have ihr := ihr br
by_cases hlt : k < key <;> simp [*]
by_cases hlt : key < k <;> simp [*]
have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
subst key
by_cases hlt : k' < k <;> simp [*]
by_cases hlt : k < k' <;> simp [*]
have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
contradiction