diff --git a/TsmLean.lean b/TsmLean.lean index afc15c8..934dbf4 100644 --- a/TsmLean.lean +++ b/TsmLean.lean @@ -5,3 +5,6 @@ import TsmLean.Core.Eval import TsmLean.Core.Types import TsmLean.Core.TypeSoundness import TsmLean.Core.Preservation +import TsmLean.Compile.Source +import TsmLean.Compile.Compile +import TsmLean.Compile.Correctness diff --git a/TsmLean/Compile/Compile.lean b/TsmLean/Compile/Compile.lean new file mode 100644 index 0000000..6128bda --- /dev/null +++ b/TsmLean/Compile/Compile.lean @@ -0,0 +1,12 @@ +import TsmLean.Compile.Source +import TsmLean.Core.Syntax + +namespace TsmLean.Compile + +open TsmLean.Core (Instr Code) + +/-- Compile a source expression to TSM bytecode. -/ +def compile : Source.Expr → Code + | .intLit n => #[.push n] + +end TsmLean.Compile diff --git a/TsmLean/Compile/Correctness.lean b/TsmLean/Compile/Correctness.lean new file mode 100644 index 0000000..3b5feab --- /dev/null +++ b/TsmLean/Compile/Correctness.lean @@ -0,0 +1,101 @@ +import TsmLean.Compile.Compile +import TsmLean.Core.Semantics + +namespace TsmLean.Compile + +open TsmLean.Core + +/-! # Compiler-correctness theorem. + + `Source.Eval e v ⟹ TSM.MultiStep (start of compile e) (end of compile e, with v on stack)` + +The CompCert-flavored bridge: source-level evaluation and target-level +execution agree on the value produced. This is the substrate-projection +theorem at miniature scale — for v0.1 instantiated only on integer +literals; extending to compound expressions is mechanical and the +infrastructure (multi-step utilities, code-lookup helper, single-step +reduction lemmas) is already in place. -/ + +/-! ## Multi-step utilities. -/ + +theorem MultiStep.trans + {s₁ s₂ s₃ : State} + (h₁ : MultiStep s₁ s₂) (h₂ : MultiStep s₂ s₃) : + MultiStep s₁ s₃ := by + induction h₁ with + | refl => exact h₂ + | cons hs _ ih => exact .cons hs (ih h₂) + +theorem MultiStep.single + {s s' : State} (h : step s = some s') : + MultiStep s s' := .cons h (.refl _) + +/-! ## Single-step reduction lemmas. -/ + +theorem step_push + {code : Code} {pc : Nat} {stack : List Value} {n : Int} + (h_pc : pc < code.size) (h_get : code[pc]'h_pc = .push n) : + step { code := code, pc := pc, stack := stack } + = some { code := code, pc := pc + 1, stack := .vInt n :: stack } := by + unfold step; rw [dif_pos h_pc, h_get] + +/-! ## Code-lookup helper. + +Looking up an index `pre.size + i` in `pre ++ compile e ++ suf` reduces +to the corresponding index in `compile e` — used in the inductive case +when extending v0.1 to compound expressions. -/ + +theorem getElem_compile + (pre : Code) (e : Source.Expr) (suf : Code) (i : Nat) + (h_lt : i < (compile e).size) + (h_pc : pre.size + i < (pre ++ compile e ++ suf).size) : + (pre ++ compile e ++ suf)[pre.size + i]'h_pc = (compile e)[i]'h_lt := by + have h_pre_ce : pre.size + i < (pre ++ compile e).size := by + rw [Array.size_append]; omega + rw [Array.getElem_append_left h_pre_ce] + rw [Array.getElem_append_right (Nat.le_add_right _ _)] + congr 1; omega + +/-! ## Main theorem. -/ + +theorem compile_correct + {e : Source.Expr} {v : Source.Value} + (h_eval : Source.Eval e v) : + ∀ (pre suf : Code) (rest : List Value), + MultiStep + { code := pre ++ compile e ++ suf, pc := pre.size, stack := rest } + { code := pre ++ compile e ++ suf, + pc := pre.size + (compile e).size, stack := v :: rest } := by + induction h_eval with + | intLit n => + intros pre suf rest + apply MultiStep.single + have h_pc : pre.size < (pre ++ compile (Source.Expr.intLit n) ++ suf).size := by + simp only [compile, Array.size_append, Array.size_singleton]; omega + have h_get : (pre ++ compile (Source.Expr.intLit n) ++ suf)[pre.size]'h_pc = .push n := by + have h_pre_ce : pre.size < (pre ++ compile (Source.Expr.intLit n)).size := by + simp only [compile, Array.size_append, Array.size_singleton]; omega + rw [Array.getElem_append_left h_pre_ce] + rw [Array.getElem_append_right (Nat.le_refl _)] + simp [compile, Nat.sub_self] + have step_thm := step_push h_pc h_get (stack := rest) + have h_size : (compile (Source.Expr.intLit n)).size = 1 := by simp [compile] + rw [h_size] + exact step_thm + +/-! ## Demo run. + +A self-contained `compile_correct` corollary for the standalone case +where the compile output IS the entire program (no surrounding code). -/ + +theorem compile_correct_standalone + {e : Source.Expr} {v : Source.Value} + (h_eval : Source.Eval e v) : + MultiStep + { code := compile e, pc := 0, stack := [] } + { code := compile e, pc := (compile e).size, stack := [v] } := by + have h := compile_correct h_eval #[] #[] [] + simp [Array.size_empty, Array.append_empty, Array.empty_append] at h + exact h + +end TsmLean.Compile diff --git a/TsmLean/Compile/Source.lean b/TsmLean/Compile/Source.lean new file mode 100644 index 0000000..13bcdc4 --- /dev/null +++ b/TsmLean/Compile/Source.lean @@ -0,0 +1,21 @@ +import TsmLean.Core.Syntax + +namespace TsmLean.Compile.Source + +/-! # Source language for compilation (v0.1). + +The smallest meaningful source language: integer literals. The +compiler produces a one-instruction TSM program; the correctness +theorem demonstrates the proof framework. Subsequent versions extend +to arithmetic, comparison, control flow, and variables. -/ + +inductive Expr where + | intLit : Int → Expr + deriving Repr, Inhabited + +abbrev Value := TsmLean.Core.Value + +inductive Eval : Expr → Value → Prop where + | intLit (n : Int) : Eval (.intLit n) (.vInt n) + +end TsmLean.Compile.Source