Add source-to-TSM compiler with proven correctness (v0.1).
The CompCert-style substrate-projection theorem at miniature scale:
source-level evaluation and TSM-bytecode execution agree on the value
produced.
TsmLean/Compile/ — three files:
Source.lean - small expression language. v0.1 covers integer
literals only; the framework is structured so
arithmetic, comparison, control flow, and
variables extend mechanically.
Compile.lean - compile : Source.Expr -> TSM.Code
v0.1: intLit n -> #[push n]
Correctness.lean - theorem compile_correct:
Source.Eval e v ->
forall pre suf rest,
MultiStep
{ code := pre ++ compile e ++ suf,
pc := pre.size, stack := rest }
{ code := same,
pc := pre.size + (compile e).size,
stack := v :: rest }
Plus a standalone corollary for the no-prefix case.
The infrastructure is in place for compositional extension:
MultiStep.trans - transitive closure of multi-step
MultiStep.single - lift single step to multi-step
step_push - per-instruction step lemma (push)
getElem_compile - lookup-in-larger-code helper
Adding a constructor to Source (e.g., add) requires:
- one constructor in Source.Expr
- one rule in Source.Eval
- one match arm in compile
- one step_X helper (one-liner)
- one case in compile_correct's induction
Demonstrates the pipeline:
- Source language with big-step semantics
- Compiler producing TSM bytecode
- Correctness theorem bridging the two
Zero sorries / axioms / admits across the entire project.
This commit is contained in:
parent
987f205ce5
commit
fff0091f89
4 changed files with 137 additions and 0 deletions
|
|
@ -5,3 +5,6 @@ import TsmLean.Core.Eval
|
|||
import TsmLean.Core.Types
|
||||
import TsmLean.Core.TypeSoundness
|
||||
import TsmLean.Core.Preservation
|
||||
import TsmLean.Compile.Source
|
||||
import TsmLean.Compile.Compile
|
||||
import TsmLean.Compile.Correctness
|
||||
|
|
|
|||
12
TsmLean/Compile/Compile.lean
Normal file
12
TsmLean/Compile/Compile.lean
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import TsmLean.Compile.Source
|
||||
import TsmLean.Core.Syntax
|
||||
|
||||
namespace TsmLean.Compile
|
||||
|
||||
open TsmLean.Core (Instr Code)
|
||||
|
||||
/-- Compile a source expression to TSM bytecode. -/
|
||||
def compile : Source.Expr → Code
|
||||
| .intLit n => #[.push n]
|
||||
|
||||
end TsmLean.Compile
|
||||
101
TsmLean/Compile/Correctness.lean
Normal file
101
TsmLean/Compile/Correctness.lean
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import TsmLean.Compile.Compile
|
||||
import TsmLean.Core.Semantics
|
||||
|
||||
namespace TsmLean.Compile
|
||||
|
||||
open TsmLean.Core
|
||||
|
||||
/-! # Compiler-correctness theorem.
|
||||
|
||||
`Source.Eval e v ⟹ TSM.MultiStep (start of compile e) (end of compile e, with v on stack)`
|
||||
|
||||
The CompCert-flavored bridge: source-level evaluation and target-level
|
||||
execution agree on the value produced. This is the substrate-projection
|
||||
theorem at miniature scale — for v0.1 instantiated only on integer
|
||||
literals; extending to compound expressions is mechanical and the
|
||||
infrastructure (multi-step utilities, code-lookup helper, single-step
|
||||
reduction lemmas) is already in place. -/
|
||||
|
||||
/-! ## Multi-step utilities. -/
|
||||
|
||||
theorem MultiStep.trans
|
||||
{s₁ s₂ s₃ : State}
|
||||
(h₁ : MultiStep s₁ s₂) (h₂ : MultiStep s₂ s₃) :
|
||||
MultiStep s₁ s₃ := by
|
||||
induction h₁ with
|
||||
| refl => exact h₂
|
||||
| cons hs _ ih => exact .cons hs (ih h₂)
|
||||
|
||||
theorem MultiStep.single
|
||||
{s s' : State} (h : step s = some s') :
|
||||
MultiStep s s' := .cons h (.refl _)
|
||||
|
||||
/-! ## Single-step reduction lemmas. -/
|
||||
|
||||
theorem step_push
|
||||
{code : Code} {pc : Nat} {stack : List Value} {n : Int}
|
||||
(h_pc : pc < code.size) (h_get : code[pc]'h_pc = .push n) :
|
||||
step { code := code, pc := pc, stack := stack }
|
||||
= some { code := code, pc := pc + 1, stack := .vInt n :: stack } := by
|
||||
unfold step; rw [dif_pos h_pc, h_get]
|
||||
|
||||
/-! ## Code-lookup helper.
|
||||
|
||||
Looking up an index `pre.size + i` in `pre ++ compile e ++ suf` reduces
|
||||
to the corresponding index in `compile e` — used in the inductive case
|
||||
when extending v0.1 to compound expressions. -/
|
||||
|
||||
theorem getElem_compile
|
||||
(pre : Code) (e : Source.Expr) (suf : Code) (i : Nat)
|
||||
(h_lt : i < (compile e).size)
|
||||
(h_pc : pre.size + i < (pre ++ compile e ++ suf).size) :
|
||||
(pre ++ compile e ++ suf)[pre.size + i]'h_pc = (compile e)[i]'h_lt := by
|
||||
have h_pre_ce : pre.size + i < (pre ++ compile e).size := by
|
||||
rw [Array.size_append]; omega
|
||||
rw [Array.getElem_append_left h_pre_ce]
|
||||
rw [Array.getElem_append_right (Nat.le_add_right _ _)]
|
||||
congr 1; omega
|
||||
|
||||
/-! ## Main theorem. -/
|
||||
|
||||
theorem compile_correct
|
||||
{e : Source.Expr} {v : Source.Value}
|
||||
(h_eval : Source.Eval e v) :
|
||||
∀ (pre suf : Code) (rest : List Value),
|
||||
MultiStep
|
||||
{ code := pre ++ compile e ++ suf, pc := pre.size, stack := rest }
|
||||
{ code := pre ++ compile e ++ suf,
|
||||
pc := pre.size + (compile e).size, stack := v :: rest } := by
|
||||
induction h_eval with
|
||||
| intLit n =>
|
||||
intros pre suf rest
|
||||
apply MultiStep.single
|
||||
have h_pc : pre.size < (pre ++ compile (Source.Expr.intLit n) ++ suf).size := by
|
||||
simp only [compile, Array.size_append, Array.size_singleton]; omega
|
||||
have h_get : (pre ++ compile (Source.Expr.intLit n) ++ suf)[pre.size]'h_pc = .push n := by
|
||||
have h_pre_ce : pre.size < (pre ++ compile (Source.Expr.intLit n)).size := by
|
||||
simp only [compile, Array.size_append, Array.size_singleton]; omega
|
||||
rw [Array.getElem_append_left h_pre_ce]
|
||||
rw [Array.getElem_append_right (Nat.le_refl _)]
|
||||
simp [compile, Nat.sub_self]
|
||||
have step_thm := step_push h_pc h_get (stack := rest)
|
||||
have h_size : (compile (Source.Expr.intLit n)).size = 1 := by simp [compile]
|
||||
rw [h_size]
|
||||
exact step_thm
|
||||
|
||||
/-! ## Demo run.
|
||||
|
||||
A self-contained `compile_correct` corollary for the standalone case
|
||||
where the compile output IS the entire program (no surrounding code). -/
|
||||
|
||||
theorem compile_correct_standalone
|
||||
{e : Source.Expr} {v : Source.Value}
|
||||
(h_eval : Source.Eval e v) :
|
||||
MultiStep
|
||||
{ code := compile e, pc := 0, stack := [] }
|
||||
{ code := compile e, pc := (compile e).size, stack := [v] } := by
|
||||
have h := compile_correct h_eval #[] #[] []
|
||||
simp [Array.size_empty, Array.append_empty, Array.empty_append] at h
|
||||
exact h
|
||||
|
||||
end TsmLean.Compile
|
||||
21
TsmLean/Compile/Source.lean
Normal file
21
TsmLean/Compile/Source.lean
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import TsmLean.Core.Syntax
|
||||
|
||||
namespace TsmLean.Compile.Source
|
||||
|
||||
/-! # Source language for compilation (v0.1).
|
||||
|
||||
The smallest meaningful source language: integer literals. The
|
||||
compiler produces a one-instruction TSM program; the correctness
|
||||
theorem demonstrates the proof framework. Subsequent versions extend
|
||||
to arithmetic, comparison, control flow, and variables. -/
|
||||
|
||||
inductive Expr where
|
||||
| intLit : Int → Expr
|
||||
deriving Repr, Inhabited
|
||||
|
||||
abbrev Value := TsmLean.Core.Value
|
||||
|
||||
inductive Eval : Expr → Value → Prop where
|
||||
| intLit (n : Int) : Eval (.intLit n) (.vInt n)
|
||||
|
||||
end TsmLean.Compile.Source
|
||||
Loading…
Add table
Reference in a new issue