doc: binary search trees

2022-04-01 19:30:28 -07:00 · 2022-04-01 19:30:28 -07:00 · ed935ed7a7
commit ed935ed7a7
parent 8636594dac
2 changed files with 225 additions and 0 deletions
--- a/doc/examples.md
+++ b/doc/examples.md
@ -2,6 +2,7 @@ Examples
 ========

 - [Palindromes](examples/palindromes.lean.html)
+- [Binary Search Trees](examples/bintree.lean.html)
 - [A Certified Type Checker](examples/tc.lean.html)
 - [The Well-Typed Interpreter](examples/interp.lean.html)
 - [Dependent de Bruijn Indices](examples/deBruijn.lean.html)
--- a/doc/examples/bintree.lean
+++ b/doc/examples/bintree.lean
@ -0,0 +1,224 @@
+/-|
+==========================================
+Binary Search Trees
+==========================================
+
+If the type of keys can be totally ordered -- that is, it supports a well-behaved `≤` comparison --
+then maps can be implemented with binary search trees (BSTs). Insert and lookup operations on BSTs take time
+proportional to the height of the tree. If the tree is balanced, the operations therefore take logarithmic time.
+
+This example is based on a similar example found in the ["Sofware Foundations"](https://softwarefoundations.cis.upenn.edu/vfa-current/SearchTree.html)
+book (volume 3).
+-/
+
+/-|
+We use `Nat` as the key type in our implementation of BSTs,
+since it has a convenient total order with lots of theorems and automation available.
+We leave as an exercise to the reader the generalization to arbitrary types.
+-/
+
+inductive Tree (β : Type v) where
+  | leaf
+  | node (left : Tree β) (key : Nat) (value : β) (right : Tree β)
+  deriving Repr
+
+/-|
+The function `contains` returns `true` iff the given gree contains the key `k`.
+-/
+def Tree.contains (t : Tree β) (k : Nat) : Bool :=
+  match t with
+  | leaf => false
+  | node left key value right =>
+    if k < key then
+      left.contains k
+    else if key < k then
+      right.contains k
+    else
+      true
+
+/-|
+`t.find? k` returns `some v` if `v` is the value bound to key `k` in the tree `t`. It returns `none` otherwise.
+-/
+def Tree.find? (t : Tree β) (k : Nat) : Option β :=
+  match t with
+  | leaf => none
+  | node left key value right =>
+    if k < key then
+      left.find? k
+    else if key < k then
+      right.find? k
+    else
+      some value
+
+/-|
+`t.insert k v` is the map containing all the bindings of `t` along with a binding of `k` to `v`.
+-/
+def Tree.insert (t : Tree β) (k : Nat) (v : β) : Tree β :=
+  match t with
+  | leaf => node leaf k v leaf
+  | node left key value right =>
+    if k < key then
+      node (left.insert k v) key value right
+    else if key < k then
+      node left key value (right.insert k v)
+    else
+      node left k v right
+/-|
+Let's add a new operation to our tree: converting it to an association list that contains the key--value bindings from the tree stored as pairs.
+If that list is sorted by the keys, then any two trees that represent the same map would be converted to the same list.
+Here's a function that does so with an in-order traversal of the tree.
+-/
+def Tree.toList (t : Tree β) : List (Nat × β) :=
+  match t with
+  | leaf => []
+  | node left key value right => left.toList ++ [(key, value)] ++ right.toList
+
+#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one"
+#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one" |>.toList
+
+/-|
+The implemention of `Tree.toList` is inefficient because of how it uses the `++` operator.
+On a balanced tree its running time is linearithmic, because it does a linear number of
+concatentations at each level of the tree. On an unbalanced tree it's quadratic time.
+Here's a tail-recursive implementation than runs in linear time, regardless of whether the tree is balanced:
+-/
+def Tree.toListTR (t : Tree β) : List (Nat × β) :=
+  go t []
+where
+  go (t : Tree β) (acc : List (Nat × β)) : List (Nat × β) :=
+    match t with
+    | leaf => acc
+    | node left key value right => left.go ((key, value) :: right.go acc)
+
+/-|
+We now prove that `t.toList` and `t.toListTR` return the same list.
+The proof is on induction, and as we used the auxiliary function `go`
+to define `Tree.toListTR`, we use an auxiliary theorem here too.
+-/
+theorem Tree.toList_eq_toListTR (t : Tree β) : t.toList = t.toListTR := by
+  simp [toListTR, go t []]
+where
+  go (t : Tree β) (acc : List (Nat × β)) : toListTR.go t acc = t.toList ++ acc := by
+    induction t generalizing acc <;> simp [toListTR.go, toList, *, List.append_assoc]
+
+/-|
+The `[csimp]` annotation instructs the Lean code generator to replace
+any `Tree.toList` with `Tree.toListTR` when generating code.
+-/
+@[csimp] theorem Tree.toList_eq_toListTR_csimp : @Tree.toList = @Tree.toListTR := by
+  funext β t
+  apply toList_eq_toListTR
+
+/-|
+The implementations of `Tree.find?` and `Tree.insert` assume that values of type tree obey the BST invariant:
+for any non-empty node with key `k`, all the values of the `left` subtree are less than `k` and all the values
+of the right subtree are greater than `k`. But that invariant is not part of the definition of tree.
+
+So, let's formalize the BST invariant. Here's one way to do so. First, we define a helper `ForallTree`
+to express that idea that a predicate holds at every node of a tree:
+-/
+inductive ForallTree (p : Nat → β → Prop) : Tree β → Prop
+  | leaf : ForallTree p .leaf
+  | node : ForallTree p left → p key value → ForallTree p right → ForallTree p (.node left key value right)
+
+
+/-|
+Second, we define the BST invariant:
+An empty tree is a BST.
+A non-empty tree is a BST if all its left nodes have a lesser key, its right nodes have a greater key, and the left and right subtrees are themselves BSTs.
+-/
+inductive BST : Tree β → Prop
+  | leaf : BST .leaf
+  | node :
+     {value : β} →
+     ForallTree (fun k v => k < key) left →
+     ForallTree (fun k v => key < k) right →
+     BST left → BST right →
+     BST (.node left key value right)
+
+/-|
+We now prove that `Tree.insert` preserves the BST invariant using induction and case analysis.
+-/
+
+theorem Tree.forall_insert_of_forall (h₁ : ForallTree p t) (h₂ : p key value) : ForallTree p (t.insert key value) := by
+  induction h₁ with
+  | leaf => exact .node .leaf h₂ .leaf
+  | node hl hp hr ihl ihr =>
+    rename Nat => k
+    simp [insert]
+    by_cases hlt : key < k <;> simp [hlt]
+    · exact .node ihl hp hr
+    · by_cases hgt : k < key <;> simp [hgt]
+      · exact .node hl hp ihr
+      · have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
+        subst key
+        exact .node hl h₂ hr
+
+theorem Tree.bst_insert_of_bst {t : Tree β} (h : BST t) (key : Nat) (value : β) : BST (t.insert key value) := by
+  induction h with
+  | leaf => exact .node .leaf .leaf .leaf .leaf
+  | node h₁ h₂ b₁ b₂ ih₁ ih₂ =>
+    rename Nat => k
+    simp [insert]
+    by_cases hlt : key < k <;> simp [*]
+    · exact .node (forall_insert_of_forall h₁ hlt) h₂ ih₁ b₂
+    · by_cases hgt : k < key <;> simp [*]
+      · exact .node h₁ (forall_insert_of_forall h₂ hgt) b₁ ih₂
+      · have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
+        subst key
+        exact .node h₁ h₂ b₁ b₂
+
+/-|
+Now, we define the type `BinTree` using a `Subtype` that states that only trees satisfying the BST invariant are `BinTree`s.
+-/
+def BinTree (β : Type u) := { t : Tree β // BST t }
+
+def BinTree.mk : BinTree β :=
+  ⟨.leaf, .leaf⟩
+
+def BinTree.contains (b : BinTree β) (k : Nat) : Bool :=
+  b.val.contains k
+
+def BinTree.find? (b : BinTree β) (k : Nat) : Option β :=
+  b.val.find? k
+
+def BinTree.insert (b : BinTree β) (k : Nat) (v : β) : BinTree β :=
+  ⟨b.val.insert k v, b.val.bst_insert_of_bst b.property k v⟩
+
+attribute [local simp] BinTree.mk BinTree.contains BinTree.find? BinTree.insert Tree.find? Tree.contains Tree.insert
+
+/-|
+Finally, we prove that `BinTree.find?` and `BinTree.insert` satisfy the map properties.
+-/
+
+theorem BinTree.find_mk {β : Type u} (k : Nat) : (@BinTree.mk β).find? k = none := by
+  simp
+
+theorem BinTree.find_insert (b : BinTree β) (k : Nat) (v : β) : (b.insert k v).find? k = some v := by
+  let ⟨t, h⟩ := b; simp
+  induction t with simp
+  | node left key value right ihl ihr =>
+    by_cases hlt : k < key <;> simp [*]
+    · cases h; apply ihl; assumption
+    · by_cases hgt : key < k <;> simp [*]
+      cases h; apply ihr; assumption
+
+theorem BinTree.find_insert_of_ne (b : BinTree β) (h : k ≠ k') (v : β) : (b.insert k v).find? k' = b.find? k' := by
+  let ⟨t, h⟩ := b; simp
+  induction t with simp
+  | leaf =>
+    split <;> simp <;> split <;> simp
+    have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
+    contradiction
+  | node left key value right ihl ihr =>
+    let .node hl hr bl br := h
+    have ihl := ihl bl
+    have ihr := ihr br
+    by_cases hlt : k < key <;> simp [*]
+    by_cases hlt : key < k <;> simp [*]
+    have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
+    subst key
+    by_cases hlt : k' < k <;> simp [*]
+    by_cases hlt : k  < k' <;> simp [*]
+    have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith
+    contradiction