From ec65229050f2298297415d52d6b943479e01047c Mon Sep 17 00:00:00 2001 From: Maximus Gorog Date: Sun, 10 May 2026 05:53:39 -0600 Subject: [PATCH] Extend source-to-TSM compiler with addition (v0.2). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Source.Expr now has intLit and add. Compile and correctness theorem both extend. The add case of compile_correct exercises the compositional structure: - IH on e1 (with extended suffix) gives the multistep for the first operand's evaluation. - IH on e2 (with extended prefix) gives the multistep for the second. - A single .add step at the boundary closes the trace. - Each intermediate state's PC is computed via array-size arithmetic threaded through omega. New supporting lemmas: step_add - per-instruction step for .add compile_add_get_op - the instruction at the end of compile (.add e1 e2) is .add. Extracted so the dependent-rewrite issue with array bound proofs is contained in one place. Engineering knowledge gained (recurring patterns when extending): - Array.getElem_append_left/right take the bound as an explicit positional arg, not via (h := ...). - rw on indices that appear in dependent bound proofs fails with "motive not type correct"; factor the lookup into a separate lemma. - convert tactic appears not to be available; rw + exact substitutes. - simp + omega closes most arithmetic on Array.size after expansion. - step lemmas with implicit args (a, b) need explicit (a := _) in calls where context doesn't determine them. Adding a constructor still follows the v0.1 recipe — one Source constructor, one Eval rule, one compile arm, one step_X helper, one compile_X_get_op lemma, one case in compile_correct's induction. Each case is ~25-40 lines of proof. Zero sorries / axioms / admits. --- TsmLean/Compile/Compile.lean | 3 +- TsmLean/Compile/Correctness.lean | 68 ++++++++++++++++++++++++++++++++ TsmLean/Compile/Source.lean | 13 +++--- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/TsmLean/Compile/Compile.lean b/TsmLean/Compile/Compile.lean index 6128bda..fe3d36d 100644 --- a/TsmLean/Compile/Compile.lean +++ b/TsmLean/Compile/Compile.lean @@ -7,6 +7,7 @@ open TsmLean.Core (Instr Code) /-- Compile a source expression to TSM bytecode. -/ def compile : Source.Expr → Code - | .intLit n => #[.push n] + | .intLit n => #[.push n] + | .add e1 e2 => compile e1 ++ compile e2 ++ #[.add] end TsmLean.Compile diff --git a/TsmLean/Compile/Correctness.lean b/TsmLean/Compile/Correctness.lean index 3b5feab..22ad1c9 100644 --- a/TsmLean/Compile/Correctness.lean +++ b/TsmLean/Compile/Correctness.lean @@ -39,6 +39,27 @@ theorem step_push = some { code := code, pc := pc + 1, stack := .vInt n :: stack } := by unfold step; rw [dif_pos h_pc, h_get] +theorem step_add + {code : Code} {pc : Nat} {a b : Int} {rest : List Value} + (h_pc : pc < code.size) (h_get : code[pc]'h_pc = .add) : + step { code := code, pc := pc, stack := .vInt b :: .vInt a :: rest } + = some { code := code, pc := pc + 1, stack := .vInt (a + b) :: rest } := by + unfold step; rw [dif_pos h_pc, h_get] + +/-! ## Compile-output lookup helpers. -/ + +/-- The instruction at the end of `compile (.add e1 e2)` (right after both + sub-compiled segments) is `.add`. -/ +theorem compile_add_get_op + (e1 e2 : Source.Expr) + (h : (compile e1).size + (compile e2).size < (compile (Source.Expr.add e1 e2)).size) : + (compile (Source.Expr.add e1 e2))[(compile e1).size + (compile e2).size]'h = .add := by + show ((compile e1 ++ compile e2) ++ #[Instr.add])[(compile e1).size + (compile e2).size]'h = .add + have hle : (compile e1 ++ compile e2).size ≤ (compile e1).size + (compile e2).size := + Nat.le_of_eq Array.size_append + rw [Array.getElem_append_right hle] + simp [Array.size_append, Nat.sub_self] + /-! ## Code-lookup helper. Looking up an index `pre.size + i` in `pre ++ compile e ++ suf` reduces @@ -82,6 +103,53 @@ theorem compile_correct have h_size : (compile (Source.Expr.intLit n)).size = 1 := by simp [compile] rw [h_size] exact step_thm + | @add e1 e2 a b _ _ ih1 ih2 => + intros pre suf rest + -- Step A: compile e1 + have stepA := ih1 pre (compile e2 ++ #[.add] ++ suf) rest + -- Step B: compile e2 (with prefix extended) + have stepB := ih2 (pre ++ compile e1) (#[.add] ++ suf) (.vInt a :: rest) + -- Code rearrangements + have h_code_A : pre ++ compile e1 ++ (compile e2 ++ #[Instr.add] ++ suf) + = pre ++ compile (Source.Expr.add e1 e2) ++ suf := by + simp only [compile, Array.append_assoc] + have h_code_B : pre ++ compile e1 ++ compile e2 ++ (#[Instr.add] ++ suf) + = pre ++ compile (Source.Expr.add e1 e2) ++ suf := by + simp only [compile, Array.append_assoc] + rw [h_code_A] at stepA + rw [show (pre ++ compile e1).size = pre.size + (compile e1).size from + by simp [Array.size_append]] at stepB + rw [h_code_B] at stepB + apply MultiStep.trans stepA + apply MultiStep.trans stepB + apply MultiStep.single + -- Final .add step + have h_total_size : (compile (Source.Expr.add e1 e2)).size + = (compile e1).size + (compile e2).size + 1 := by + show (compile e1 ++ compile e2 ++ #[Instr.add]).size + = (compile e1).size + (compile e2).size + 1 + simp [Array.size_append]; omega + have h_op_pc : pre.size + (compile e1).size + (compile e2).size + < (pre ++ compile (Source.Expr.add e1 e2) ++ suf).size := by + simp only [Array.size_append, h_total_size]; omega + have h_in_comp : (compile e1).size + (compile e2).size + < (compile (Source.Expr.add e1 e2)).size := by + simp [compile, Array.size_append] + have h_full_pc : pre.size + ((compile e1).size + (compile e2).size) + < (pre ++ compile (Source.Expr.add e1 e2) ++ suf).size := by + simp only [Array.size_append, h_total_size]; omega + have h_op_get : (pre ++ compile (Source.Expr.add e1 e2) ++ suf)[pre.size + ((compile e1).size + (compile e2).size)]'h_full_pc = .add := by + rw [getElem_compile pre (Source.Expr.add e1 e2) suf + ((compile e1).size + (compile e2).size) h_in_comp h_full_pc] + exact compile_add_get_op e1 e2 h_in_comp + have h_step := step_add (a := a) (b := b) (rest := rest) h_full_pc h_op_get + have h_pre_pc : pre.size + (compile e1).size + (compile e2).size + = pre.size + ((compile e1).size + (compile e2).size) := by omega + have h_post_pc : pre.size + ((compile e1).size + (compile e2).size) + 1 + = pre.size + (compile (Source.Expr.add e1 e2)).size := by + rw [h_total_size]; omega + rw [h_pre_pc, ← h_post_pc] + exact h_step /-! ## Demo run. diff --git a/TsmLean/Compile/Source.lean b/TsmLean/Compile/Source.lean index 13bcdc4..cb63650 100644 --- a/TsmLean/Compile/Source.lean +++ b/TsmLean/Compile/Source.lean @@ -2,20 +2,23 @@ import TsmLean.Core.Syntax namespace TsmLean.Compile.Source -/-! # Source language for compilation (v0.1). +/-! # Source language for compilation (v0.2). -The smallest meaningful source language: integer literals. The -compiler produces a one-instruction TSM program; the correctness -theorem demonstrates the proof framework. Subsequent versions extend -to arithmetic, comparison, control flow, and variables. -/ +Integer literals + addition. The minimal "tree of operations" that +exercises the compositional structure of the correctness proof. -/ inductive Expr where | intLit : Int → Expr + | add : Expr → Expr → Expr deriving Repr, Inhabited abbrev Value := TsmLean.Core.Value inductive Eval : Expr → Value → Prop where | intLit (n : Int) : Eval (.intLit n) (.vInt n) + | add {e1 e2 a b} + (h1 : Eval e1 (.vInt a)) + (h2 : Eval e2 (.vInt b)) : + Eval (.add e1 e2) (.vInt (a + b)) end TsmLean.Compile.Source