From ed935ed7a73bc544de2d16fe034b7f0d05fae6ec Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Fri, 1 Apr 2022 19:30:28 -0700 Subject: [PATCH] doc: binary search trees --- doc/examples.md | 1 + doc/examples/bintree.lean | 224 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 doc/examples/bintree.lean diff --git a/doc/examples.md b/doc/examples.md index e73b82a167..2460d1aefb 100644 --- a/doc/examples.md +++ b/doc/examples.md @@ -2,6 +2,7 @@ Examples ======== - [Palindromes](examples/palindromes.lean.html) +- [Binary Search Trees](examples/bintree.lean.html) - [A Certified Type Checker](examples/tc.lean.html) - [The Well-Typed Interpreter](examples/interp.lean.html) - [Dependent de Bruijn Indices](examples/deBruijn.lean.html) diff --git a/doc/examples/bintree.lean b/doc/examples/bintree.lean new file mode 100644 index 0000000000..d071834bf0 --- /dev/null +++ b/doc/examples/bintree.lean @@ -0,0 +1,224 @@ +/-| +========================================== +Binary Search Trees +========================================== + +If the type of keys can be totally ordered -- that is, it supports a well-behaved `≤` comparison -- +then maps can be implemented with binary search trees (BSTs). Insert and lookup operations on BSTs take time +proportional to the height of the tree. If the tree is balanced, the operations therefore take logarithmic time. + +This example is based on a similar example found in the ["Sofware Foundations"](https://softwarefoundations.cis.upenn.edu/vfa-current/SearchTree.html) +book (volume 3). +-/ + +/-| +We use `Nat` as the key type in our implementation of BSTs, +since it has a convenient total order with lots of theorems and automation available. +We leave as an exercise to the reader the generalization to arbitrary types. +-/ + +inductive Tree (β : Type v) where + | leaf + | node (left : Tree β) (key : Nat) (value : β) (right : Tree β) + deriving Repr + +/-| +The function `contains` returns `true` iff the given gree contains the key `k`. +-/ +def Tree.contains (t : Tree β) (k : Nat) : Bool := + match t with + | leaf => false + | node left key value right => + if k < key then + left.contains k + else if key < k then + right.contains k + else + true + +/-| +`t.find? k` returns `some v` if `v` is the value bound to key `k` in the tree `t`. It returns `none` otherwise. +-/ +def Tree.find? (t : Tree β) (k : Nat) : Option β := + match t with + | leaf => none + | node left key value right => + if k < key then + left.find? k + else if key < k then + right.find? k + else + some value + +/-| +`t.insert k v` is the map containing all the bindings of `t` along with a binding of `k` to `v`. +-/ +def Tree.insert (t : Tree β) (k : Nat) (v : β) : Tree β := + match t with + | leaf => node leaf k v leaf + | node left key value right => + if k < key then + node (left.insert k v) key value right + else if key < k then + node left key value (right.insert k v) + else + node left k v right +/-| +Let's add a new operation to our tree: converting it to an association list that contains the key--value bindings from the tree stored as pairs. +If that list is sorted by the keys, then any two trees that represent the same map would be converted to the same list. +Here's a function that does so with an in-order traversal of the tree. +-/ +def Tree.toList (t : Tree β) : List (Nat × β) := + match t with + | leaf => [] + | node left key value right => left.toList ++ [(key, value)] ++ right.toList + +#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one" +#eval Tree.leaf.insert 2 "two" |>.insert 3 "three" |>.insert 1 "one" |>.toList + +/-| +The implemention of `Tree.toList` is inefficient because of how it uses the `++` operator. +On a balanced tree its running time is linearithmic, because it does a linear number of +concatentations at each level of the tree. On an unbalanced tree it's quadratic time. +Here's a tail-recursive implementation than runs in linear time, regardless of whether the tree is balanced: +-/ +def Tree.toListTR (t : Tree β) : List (Nat × β) := + go t [] +where + go (t : Tree β) (acc : List (Nat × β)) : List (Nat × β) := + match t with + | leaf => acc + | node left key value right => left.go ((key, value) :: right.go acc) + +/-| +We now prove that `t.toList` and `t.toListTR` return the same list. +The proof is on induction, and as we used the auxiliary function `go` +to define `Tree.toListTR`, we use an auxiliary theorem here too. +-/ +theorem Tree.toList_eq_toListTR (t : Tree β) : t.toList = t.toListTR := by + simp [toListTR, go t []] +where + go (t : Tree β) (acc : List (Nat × β)) : toListTR.go t acc = t.toList ++ acc := by + induction t generalizing acc <;> simp [toListTR.go, toList, *, List.append_assoc] + +/-| +The `[csimp]` annotation instructs the Lean code generator to replace +any `Tree.toList` with `Tree.toListTR` when generating code. +-/ +@[csimp] theorem Tree.toList_eq_toListTR_csimp : @Tree.toList = @Tree.toListTR := by + funext β t + apply toList_eq_toListTR + +/-| +The implementations of `Tree.find?` and `Tree.insert` assume that values of type tree obey the BST invariant: +for any non-empty node with key `k`, all the values of the `left` subtree are less than `k` and all the values +of the right subtree are greater than `k`. But that invariant is not part of the definition of tree. + +So, let's formalize the BST invariant. Here's one way to do so. First, we define a helper `ForallTree` +to express that idea that a predicate holds at every node of a tree: +-/ +inductive ForallTree (p : Nat → β → Prop) : Tree β → Prop + | leaf : ForallTree p .leaf + | node : ForallTree p left → p key value → ForallTree p right → ForallTree p (.node left key value right) + + +/-| +Second, we define the BST invariant: +An empty tree is a BST. +A non-empty tree is a BST if all its left nodes have a lesser key, its right nodes have a greater key, and the left and right subtrees are themselves BSTs. +-/ +inductive BST : Tree β → Prop + | leaf : BST .leaf + | node : + {value : β} → + ForallTree (fun k v => k < key) left → + ForallTree (fun k v => key < k) right → + BST left → BST right → + BST (.node left key value right) + +/-| +We now prove that `Tree.insert` preserves the BST invariant using induction and case analysis. +-/ + +theorem Tree.forall_insert_of_forall (h₁ : ForallTree p t) (h₂ : p key value) : ForallTree p (t.insert key value) := by + induction h₁ with + | leaf => exact .node .leaf h₂ .leaf + | node hl hp hr ihl ihr => + rename Nat => k + simp [insert] + by_cases hlt : key < k <;> simp [hlt] + · exact .node ihl hp hr + · by_cases hgt : k < key <;> simp [hgt] + · exact .node hl hp ihr + · have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith + subst key + exact .node hl h₂ hr + +theorem Tree.bst_insert_of_bst {t : Tree β} (h : BST t) (key : Nat) (value : β) : BST (t.insert key value) := by + induction h with + | leaf => exact .node .leaf .leaf .leaf .leaf + | node h₁ h₂ b₁ b₂ ih₁ ih₂ => + rename Nat => k + simp [insert] + by_cases hlt : key < k <;> simp [*] + · exact .node (forall_insert_of_forall h₁ hlt) h₂ ih₁ b₂ + · by_cases hgt : k < key <;> simp [*] + · exact .node h₁ (forall_insert_of_forall h₂ hgt) b₁ ih₂ + · have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith + subst key + exact .node h₁ h₂ b₁ b₂ + +/-| +Now, we define the type `BinTree` using a `Subtype` that states that only trees satisfying the BST invariant are `BinTree`s. +-/ +def BinTree (β : Type u) := { t : Tree β // BST t } + +def BinTree.mk : BinTree β := + ⟨.leaf, .leaf⟩ + +def BinTree.contains (b : BinTree β) (k : Nat) : Bool := + b.val.contains k + +def BinTree.find? (b : BinTree β) (k : Nat) : Option β := + b.val.find? k + +def BinTree.insert (b : BinTree β) (k : Nat) (v : β) : BinTree β := + ⟨b.val.insert k v, b.val.bst_insert_of_bst b.property k v⟩ + +attribute [local simp] BinTree.mk BinTree.contains BinTree.find? BinTree.insert Tree.find? Tree.contains Tree.insert + +/-| +Finally, we prove that `BinTree.find?` and `BinTree.insert` satisfy the map properties. +-/ + +theorem BinTree.find_mk {β : Type u} (k : Nat) : (@BinTree.mk β).find? k = none := by + simp + +theorem BinTree.find_insert (b : BinTree β) (k : Nat) (v : β) : (b.insert k v).find? k = some v := by + let ⟨t, h⟩ := b; simp + induction t with simp + | node left key value right ihl ihr => + by_cases hlt : k < key <;> simp [*] + · cases h; apply ihl; assumption + · by_cases hgt : key < k <;> simp [*] + cases h; apply ihr; assumption + +theorem BinTree.find_insert_of_ne (b : BinTree β) (h : k ≠ k') (v : β) : (b.insert k v).find? k' = b.find? k' := by + let ⟨t, h⟩ := b; simp + induction t with simp + | leaf => + split <;> simp <;> split <;> simp + have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith + contradiction + | node left key value right ihl ihr => + let .node hl hr bl br := h + have ihl := ihl bl + have ihr := ihr br + by_cases hlt : k < key <;> simp [*] + by_cases hlt : key < k <;> simp [*] + have heq : key = k := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith + subst key + by_cases hlt : k' < k <;> simp [*] + by_cases hlt : k < k' <;> simp [*] + have heq : k = k' := by simp_arith at *; apply Nat.le_antisymm <;> assumption -- TODO: improve with linarith + contradiction