Extend source-to-TSM compiler with addition (v0.2).

Source.Expr now has intLit and add. Compile and correctness theorem
both extend.

The add case of compile_correct exercises the compositional structure:
  - IH on e1 (with extended suffix) gives the multistep for the first
    operand's evaluation.
  - IH on e2 (with extended prefix) gives the multistep for the second.
  - A single .add step at the boundary closes the trace.
  - Each intermediate state's PC is computed via array-size arithmetic
    threaded through omega.

New supporting lemmas:
  step_add               - per-instruction step for .add
  compile_add_get_op     - the instruction at the end of compile (.add e1 e2)
                           is .add. Extracted so the dependent-rewrite issue
                           with array bound proofs is contained in one place.

Engineering knowledge gained (recurring patterns when extending):
  - Array.getElem_append_left/right take the bound as an explicit positional
    arg, not via (h := ...).
  - rw on indices that appear in dependent bound proofs fails with "motive
    not type correct"; factor the lookup into a separate lemma.
  - convert tactic appears not to be available; rw + exact substitutes.
  - simp + omega closes most arithmetic on Array.size after expansion.
  - step lemmas with implicit args (a, b) need explicit (a := _) in calls
    where context doesn't determine them.

Adding a constructor still follows the v0.1 recipe — one Source
constructor, one Eval rule, one compile arm, one step_X helper, one
compile_X_get_op lemma, one case in compile_correct's induction. Each
case is ~25-40 lines of proof.

Zero sorries / axioms / admits.
This commit is contained in:
Maximus Gorog 2026-05-10 05:53:39 -06:00
parent fff0091f89
commit ec65229050
3 changed files with 78 additions and 6 deletions

View file

@ -7,6 +7,7 @@ open TsmLean.Core (Instr Code)
/-- Compile a source expression to TSM bytecode. -/
def compile : Source.Expr → Code
| .intLit n => #[.push n]
| .intLit n => #[.push n]
| .add e1 e2 => compile e1 ++ compile e2 ++ #[.add]
end TsmLean.Compile

View file

@ -39,6 +39,27 @@ theorem step_push
= some { code := code, pc := pc + 1, stack := .vInt n :: stack } := by
unfold step; rw [dif_pos h_pc, h_get]
theorem step_add
{code : Code} {pc : Nat} {a b : Int} {rest : List Value}
(h_pc : pc < code.size) (h_get : code[pc]'h_pc = .add) :
step { code := code, pc := pc, stack := .vInt b :: .vInt a :: rest }
= some { code := code, pc := pc + 1, stack := .vInt (a + b) :: rest } := by
unfold step; rw [dif_pos h_pc, h_get]
/-! ## Compile-output lookup helpers. -/
/-- The instruction at the end of `compile (.add e1 e2)` (right after both
sub-compiled segments) is `.add`. -/
theorem compile_add_get_op
(e1 e2 : Source.Expr)
(h : (compile e1).size + (compile e2).size < (compile (Source.Expr.add e1 e2)).size) :
(compile (Source.Expr.add e1 e2))[(compile e1).size + (compile e2).size]'h = .add := by
show ((compile e1 ++ compile e2) ++ #[Instr.add])[(compile e1).size + (compile e2).size]'h = .add
have hle : (compile e1 ++ compile e2).size ≤ (compile e1).size + (compile e2).size :=
Nat.le_of_eq Array.size_append
rw [Array.getElem_append_right hle]
simp [Array.size_append, Nat.sub_self]
/-! ## Code-lookup helper.
Looking up an index `pre.size + i` in `pre ++ compile e ++ suf` reduces
@ -82,6 +103,53 @@ theorem compile_correct
have h_size : (compile (Source.Expr.intLit n)).size = 1 := by simp [compile]
rw [h_size]
exact step_thm
| @add e1 e2 a b _ _ ih1 ih2 =>
intros pre suf rest
-- Step A: compile e1
have stepA := ih1 pre (compile e2 ++ #[.add] ++ suf) rest
-- Step B: compile e2 (with prefix extended)
have stepB := ih2 (pre ++ compile e1) (#[.add] ++ suf) (.vInt a :: rest)
-- Code rearrangements
have h_code_A : pre ++ compile e1 ++ (compile e2 ++ #[Instr.add] ++ suf)
= pre ++ compile (Source.Expr.add e1 e2) ++ suf := by
simp only [compile, Array.append_assoc]
have h_code_B : pre ++ compile e1 ++ compile e2 ++ (#[Instr.add] ++ suf)
= pre ++ compile (Source.Expr.add e1 e2) ++ suf := by
simp only [compile, Array.append_assoc]
rw [h_code_A] at stepA
rw [show (pre ++ compile e1).size = pre.size + (compile e1).size from
by simp [Array.size_append]] at stepB
rw [h_code_B] at stepB
apply MultiStep.trans stepA
apply MultiStep.trans stepB
apply MultiStep.single
-- Final .add step
have h_total_size : (compile (Source.Expr.add e1 e2)).size
= (compile e1).size + (compile e2).size + 1 := by
show (compile e1 ++ compile e2 ++ #[Instr.add]).size
= (compile e1).size + (compile e2).size + 1
simp [Array.size_append]; omega
have h_op_pc : pre.size + (compile e1).size + (compile e2).size
< (pre ++ compile (Source.Expr.add e1 e2) ++ suf).size := by
simp only [Array.size_append, h_total_size]; omega
have h_in_comp : (compile e1).size + (compile e2).size
< (compile (Source.Expr.add e1 e2)).size := by
simp [compile, Array.size_append]
have h_full_pc : pre.size + ((compile e1).size + (compile e2).size)
< (pre ++ compile (Source.Expr.add e1 e2) ++ suf).size := by
simp only [Array.size_append, h_total_size]; omega
have h_op_get : (pre ++ compile (Source.Expr.add e1 e2) ++ suf)[pre.size + ((compile e1).size + (compile e2).size)]'h_full_pc = .add := by
rw [getElem_compile pre (Source.Expr.add e1 e2) suf
((compile e1).size + (compile e2).size) h_in_comp h_full_pc]
exact compile_add_get_op e1 e2 h_in_comp
have h_step := step_add (a := a) (b := b) (rest := rest) h_full_pc h_op_get
have h_pre_pc : pre.size + (compile e1).size + (compile e2).size
= pre.size + ((compile e1).size + (compile e2).size) := by omega
have h_post_pc : pre.size + ((compile e1).size + (compile e2).size) + 1
= pre.size + (compile (Source.Expr.add e1 e2)).size := by
rw [h_total_size]; omega
rw [h_pre_pc, ← h_post_pc]
exact h_step
/-! ## Demo run.

View file

@ -2,20 +2,23 @@ import TsmLean.Core.Syntax
namespace TsmLean.Compile.Source
/-! # Source language for compilation (v0.1).
/-! # Source language for compilation (v0.2).
The smallest meaningful source language: integer literals. The
compiler produces a one-instruction TSM program; the correctness
theorem demonstrates the proof framework. Subsequent versions extend
to arithmetic, comparison, control flow, and variables. -/
Integer literals + addition. The minimal "tree of operations" that
exercises the compositional structure of the correctness proof. -/
inductive Expr where
| intLit : Int → Expr
| add : Expr → Expr → Expr
deriving Repr, Inhabited
abbrev Value := TsmLean.Core.Value
inductive Eval : Expr → Value → Prop where
| intLit (n : Int) : Eval (.intLit n) (.vInt n)
| add {e1 e2 a b}
(h1 : Eval e1 (.vInt a))
(h2 : Eval e2 (.vInt b)) :
Eval (.add e1 e2) (.vInt (a + b))
end TsmLean.Compile.Source