Add source-to-TSM compiler with proven correctness (v0.1).

The CompCert-style substrate-projection theorem at miniature scale:
source-level evaluation and TSM-bytecode execution agree on the value
produced.

TsmLean/Compile/ — three files:

  Source.lean       - small expression language. v0.1 covers integer
                      literals only; the framework is structured so
                      arithmetic, comparison, control flow, and
                      variables extend mechanically.

  Compile.lean      - compile : Source.Expr -> TSM.Code
                      v0.1: intLit n -> #[push n]

  Correctness.lean  - theorem compile_correct:
                        Source.Eval e v ->
                        forall pre suf rest,
                          MultiStep
                            { code := pre ++ compile e ++ suf,
                              pc := pre.size, stack := rest }
                            { code := same,
                              pc := pre.size + (compile e).size,
                              stack := v :: rest }
                      Plus a standalone corollary for the no-prefix case.

The infrastructure is in place for compositional extension:

  MultiStep.trans       - transitive closure of multi-step
  MultiStep.single      - lift single step to multi-step
  step_push             - per-instruction step lemma (push)
  getElem_compile       - lookup-in-larger-code helper

Adding a constructor to Source (e.g., add) requires:
  - one constructor in Source.Expr
  - one rule in Source.Eval
  - one match arm in compile
  - one step_X helper (one-liner)
  - one case in compile_correct's induction

Demonstrates the pipeline:
  - Source language with big-step semantics
  - Compiler producing TSM bytecode
  - Correctness theorem bridging the two

Zero sorries / axioms / admits across the entire project.
This commit is contained in:
Maximus Gorog 2026-05-10 05:38:01 -06:00
parent 987f205ce5
commit fff0091f89
4 changed files with 137 additions and 0 deletions

View file

@ -5,3 +5,6 @@ import TsmLean.Core.Eval
import TsmLean.Core.Types
import TsmLean.Core.TypeSoundness
import TsmLean.Core.Preservation
import TsmLean.Compile.Source
import TsmLean.Compile.Compile
import TsmLean.Compile.Correctness

View file

@ -0,0 +1,12 @@
import TsmLean.Compile.Source
import TsmLean.Core.Syntax
namespace TsmLean.Compile
open TsmLean.Core (Instr Code)
/-- Compile a source expression to TSM bytecode. -/
def compile : Source.Expr → Code
| .intLit n => #[.push n]
end TsmLean.Compile

View file

@ -0,0 +1,101 @@
import TsmLean.Compile.Compile
import TsmLean.Core.Semantics
namespace TsmLean.Compile
open TsmLean.Core
/-! # Compiler-correctness theorem.
`Source.Eval e v ⟹ TSM.MultiStep (start of compile e) (end of compile e, with v on stack)`
The CompCert-flavored bridge: source-level evaluation and target-level
execution agree on the value produced. This is the substrate-projection
theorem at miniature scale — for v0.1 instantiated only on integer
literals; extending to compound expressions is mechanical and the
infrastructure (multi-step utilities, code-lookup helper, single-step
reduction lemmas) is already in place. -/
/-! ## Multi-step utilities. -/
theorem MultiStep.trans
{s₁ s₂ s₃ : State}
(h₁ : MultiStep s₁ s₂) (h₂ : MultiStep s₂ s₃) :
MultiStep s₁ s₃ := by
induction h₁ with
| refl => exact h₂
| cons hs _ ih => exact .cons hs (ih h₂)
theorem MultiStep.single
{s s' : State} (h : step s = some s') :
MultiStep s s' := .cons h (.refl _)
/-! ## Single-step reduction lemmas. -/
theorem step_push
{code : Code} {pc : Nat} {stack : List Value} {n : Int}
(h_pc : pc < code.size) (h_get : code[pc]'h_pc = .push n) :
step { code := code, pc := pc, stack := stack }
= some { code := code, pc := pc + 1, stack := .vInt n :: stack } := by
unfold step; rw [dif_pos h_pc, h_get]
/-! ## Code-lookup helper.
Looking up an index `pre.size + i` in `pre ++ compile e ++ suf` reduces
to the corresponding index in `compile e` — used in the inductive case
when extending v0.1 to compound expressions. -/
theorem getElem_compile
(pre : Code) (e : Source.Expr) (suf : Code) (i : Nat)
(h_lt : i < (compile e).size)
(h_pc : pre.size + i < (pre ++ compile e ++ suf).size) :
(pre ++ compile e ++ suf)[pre.size + i]'h_pc = (compile e)[i]'h_lt := by
have h_pre_ce : pre.size + i < (pre ++ compile e).size := by
rw [Array.size_append]; omega
rw [Array.getElem_append_left h_pre_ce]
rw [Array.getElem_append_right (Nat.le_add_right _ _)]
congr 1; omega
/-! ## Main theorem. -/
theorem compile_correct
{e : Source.Expr} {v : Source.Value}
(h_eval : Source.Eval e v) :
∀ (pre suf : Code) (rest : List Value),
MultiStep
{ code := pre ++ compile e ++ suf, pc := pre.size, stack := rest }
{ code := pre ++ compile e ++ suf,
pc := pre.size + (compile e).size, stack := v :: rest } := by
induction h_eval with
| intLit n =>
intros pre suf rest
apply MultiStep.single
have h_pc : pre.size < (pre ++ compile (Source.Expr.intLit n) ++ suf).size := by
simp only [compile, Array.size_append, Array.size_singleton]; omega
have h_get : (pre ++ compile (Source.Expr.intLit n) ++ suf)[pre.size]'h_pc = .push n := by
have h_pre_ce : pre.size < (pre ++ compile (Source.Expr.intLit n)).size := by
simp only [compile, Array.size_append, Array.size_singleton]; omega
rw [Array.getElem_append_left h_pre_ce]
rw [Array.getElem_append_right (Nat.le_refl _)]
simp [compile, Nat.sub_self]
have step_thm := step_push h_pc h_get (stack := rest)
have h_size : (compile (Source.Expr.intLit n)).size = 1 := by simp [compile]
rw [h_size]
exact step_thm
/-! ## Demo run.
A self-contained `compile_correct` corollary for the standalone case
where the compile output IS the entire program (no surrounding code). -/
theorem compile_correct_standalone
{e : Source.Expr} {v : Source.Value}
(h_eval : Source.Eval e v) :
MultiStep
{ code := compile e, pc := 0, stack := [] }
{ code := compile e, pc := (compile e).size, stack := [v] } := by
have h := compile_correct h_eval #[] #[] []
simp [Array.size_empty, Array.append_empty, Array.empty_append] at h
exact h
end TsmLean.Compile

View file

@ -0,0 +1,21 @@
import TsmLean.Core.Syntax
namespace TsmLean.Compile.Source
/-! # Source language for compilation (v0.1).
The smallest meaningful source language: integer literals. The
compiler produces a one-instruction TSM program; the correctness
theorem demonstrates the proof framework. Subsequent versions extend
to arithmetic, comparison, control flow, and variables. -/
inductive Expr where
| intLit : Int → Expr
deriving Repr, Inhabited
abbrev Value := TsmLean.Core.Value
inductive Eval : Expr → Value → Prop where
| intLit (n : Int) : Eval (.intLit n) (.vInt n)
end TsmLean.Compile.Source