diff --git a/golang-lean/.gitignore b/golang-lean/.gitignore new file mode 100644 index 0000000..871d475 --- /dev/null +++ b/golang-lean/.gitignore @@ -0,0 +1,2 @@ +/.lake +/go-upstream diff --git a/golang-lean/CorpusCheck.lean b/golang-lean/CorpusCheck.lean new file mode 100644 index 0000000..591a214 --- /dev/null +++ b/golang-lean/CorpusCheck.lean @@ -0,0 +1,9 @@ +import GolangLean + +/-! Corpus driver — runs every `.go` script under `corpus/` and compares +the captured stdout to the matching `.expected` file. Mirrors octive-lean's +`CorpusCheck`. Currently a no-op until the interpreter exists. -/ + +def main (_args : List String) : IO UInt32 := do + IO.println "golang-lean corpus-check: no corpus yet (interpreter is a scaffold)" + return 0 diff --git a/golang-lean/GolangLean.lean b/golang-lean/GolangLean.lean new file mode 100644 index 0000000..cfb50fd --- /dev/null +++ b/golang-lean/GolangLean.lean @@ -0,0 +1,20 @@ +import GolangLean.Error +import GolangLean.Token +import GolangLean.AST +import GolangLean.Value +import GolangLean.Env +import GolangLean.Scanner +import GolangLean.Parser +import GolangLean.Eval +import GolangLean.Builtins +import GolangLean.REPL +import GolangLean.PureEval +import GolangLean.BigStep +import GolangLean.ValueEquiv +import GolangLean.Core.Syntax +import GolangLean.Core.Semantics +import GolangLean.Core.Determinism +import GolangLean.Core.Eval +import GolangLean.Core.Types +import GolangLean.Core.TypeSoundness +import GolangLean.Core.Preservation diff --git a/golang-lean/GolangLean/AST.lean b/golang-lean/GolangLean/AST.lean new file mode 100644 index 0000000..eb974d0 --- /dev/null +++ b/golang-lean/GolangLean/AST.lean @@ -0,0 +1,119 @@ +import GolangLean.Token + +namespace GolangLean + +/-! # Go AST — mirrors `go/ast/ast.go`. + +The Go reference parser produces nodes from three primary categories: +`Expr`, `Stmt`, `Decl`. We mirror that structure here. The point of mirroring +is that proofs and translations can refer to nodes by the same names that +Go programmers use, and so the eventual cross-language layer can compare +shapes apples-to-apples with octive-lean's `Expr/Stmt`. -/ + +/-! ## Operator categories (subsets of `Tok`) -/ + +inductive BinOp where + | add | sub | mul | quo | rem + | and | or | xor | shl | shr | andNot + | land | lor + | eql | neq | lss | leq | gtr | geq + deriving Repr, BEq, Inhabited + +inductive UnOp where + | plus | minus | not | xor | mul -- `*p` (deref) + | and -- `&v` (addr-of) + | arrow -- `<-ch` (recv) + deriving Repr, BEq, Inhabited + +/-! ## Channel direction (for `chan T`, `<-chan T`, `chan<- T`) -/ + +inductive ChanDir where + | send | recv | both + deriving Repr, BEq, Inhabited + +/-! ## AST nodes (mutually recursive: Expr ↔ Stmt ↔ Decl ↔ Type) -/ + +mutual + + /-- A Go expression. Types are also expressions in Go's grammar. -/ + inductive Expr where + | bad + | ident : String → Expr + | intLit : Int → Expr + | floatLit : Float → Expr + | stringLit : String → Expr + | charLit : Char → Expr + | imagLit : Float → Expr + | binOp : BinOp → Expr → Expr → Expr + | unOp : UnOp → Expr → Expr + | call : Expr → Array Expr → Bool → Expr -- f(args), variadic? + | selector : Expr → String → Expr -- x.Sel + | index : Expr → Expr → Expr -- x[i] + | slice : Expr → Option Expr → Option Expr → Option Expr → Expr -- x[lo:hi:max] + | typeAssert : Expr → Option Expr → Expr -- x.(T) (T = none for x.(type)) + | star : Expr → Expr -- *T or *p + | paren : Expr → Expr + | composite : Option Expr → Array Expr → Expr -- T{elts...} + | keyValue : Expr → Expr → Expr -- key: value (in composite) + | funcLit : FuncType → Array Stmt → Expr -- func(...) ... { ... } + | arrayType : Option Expr → Expr → Expr -- [N]T or [...]T (none = ...) + | sliceType : Expr → Expr -- []T + | mapType : Expr → Expr → Expr -- map[K]V + | chanType : ChanDir → Expr → Expr -- chan T / <-chan T / chan<- T + | structType : Array Field → Expr -- struct{ ... } + | interfaceType : Array Field → Expr -- interface{ ... } + | ellipsis : Option Expr → Expr -- ...T (variadic param / array len) + + /-- A struct field or method signature in an interface. -/ + inductive Field where + | mk : Array String → Expr → Option String → Field -- names, type, tag + + /-- A function-type expression: parameters and results. -/ + inductive FuncType where + | mk : Array Field → Array Field → FuncType -- params, results + + /-- A Go statement. -/ + inductive Stmt where + | bad + | declS : Decl → Stmt + | empty : Stmt + | labeled : String → Stmt → Stmt + | exprS : Expr → Stmt + | sendS : Expr → Expr → Stmt -- ch <- x + | incDec : Expr → Bool → Stmt -- x++ / x-- (true = inc) + | assignS : Array Expr → Tok → Array Expr → Stmt -- lhs op rhs (op ∈ {=, :=, +=, ...}) + | goS : Expr → Stmt -- go f(...) + | deferS : Expr → Stmt -- defer f(...) + | returnS : Array Expr → Stmt + | branchS : Tok → Option String → Stmt -- break/continue/goto/fallthrough + | block : Array Stmt → Stmt + | ifS : Option Stmt → Expr → Array Stmt → Option Stmt → Stmt + | switchS : Option Stmt → Option Expr → Array Stmt → Stmt + | typeSwitch : Option Stmt → Stmt → Array Stmt → Stmt + | caseClause : Array Expr → Array Stmt → Stmt + | selectS : Array Stmt → Stmt + | commClause : Option Stmt → Array Stmt → Stmt + | forS : Option Stmt → Option Expr → Option Stmt → Array Stmt → Stmt + | rangeS : Option Expr → Option Expr → Tok → Expr → Array Stmt → Stmt -- key,val := range x + + /-- A top-level or block-scoped declaration. -/ + inductive Decl where + | bad + | importD : Array (Option String × String) → Decl -- (alias, path) + | constD : Array (Array String × Option Expr × Array Expr) → Decl -- (names, type?, values) + | varD : Array (Array String × Option Expr × Array Expr) → Decl + | typeD : Array (String × Expr × Bool) → Decl -- (name, type, isAlias) + | funcD : Option Field → String → FuncType → Option (Array Stmt) → Decl + -- receiver?, name, signature, body? (none = forward decl in interface) + +end + +/-! ## File / Package -/ + +structure GoFile where + pkg : String + imports : Array (Option String × String) + decls : Array Decl + deriving Inhabited + +end GolangLean diff --git a/golang-lean/GolangLean/BigStep.lean b/golang-lean/GolangLean/BigStep.lean new file mode 100644 index 0000000..c5a132d --- /dev/null +++ b/golang-lean/GolangLean/BigStep.lean @@ -0,0 +1,21 @@ +import GolangLean.PureEval + +namespace GolangLean + +/-! # Big-step operational semantics. + +Mirror of octive-lean's `BigStep`. An inductive relation +`BigStepExpr : Env → Expr → Value → Env → Prop` that serves as the formal +specification of Go semantics, independent of the executable evaluator. + +This is the layer where cross-language proof becomes possible: theorems +stated about `BigStepExpr` for Go should look identical in shape to the +Octave version. The shared shape is the candidate for a language-generic +core. + +Stub. Constructors will be added one syntactic form at a time. -/ + +inductive BigStepExpr : Env → Expr → Value → Env → Prop where + | placeholder : BigStepExpr env e v env -- delete once real rules exist + +end GolangLean diff --git a/golang-lean/GolangLean/Builtins.lean b/golang-lean/GolangLean/Builtins.lean new file mode 100644 index 0000000..56fe2e2 --- /dev/null +++ b/golang-lean/GolangLean/Builtins.lean @@ -0,0 +1,18 @@ +import GolangLean.Value +import GolangLean.Env + +namespace GolangLean + +/-! # Built-in functions and the predeclared identifiers. + +Two layers in Go: + * Predeclared identifiers (`true`, `false`, `nil`, `iota`, basic types) — values. + * Built-in functions (`len`, `cap`, `make`, `new`, `append`, `copy`, `delete`, + `panic`, `recover`, `print`, `println`) — special forms in the spec. + * `fmt.*` and friends are *not* builtins; they live in package `fmt`. + +Stub for now. -/ + +def emptyBuiltinTable : List (String × (Array Value → IO (Except String Value))) := [] + +end GolangLean diff --git a/golang-lean/GolangLean/Core/Determinism.lean b/golang-lean/GolangLean/Core/Determinism.lean new file mode 100644 index 0000000..083ea24 --- /dev/null +++ b/golang-lean/GolangLean/Core/Determinism.lean @@ -0,0 +1,107 @@ +import GolangLean.Core.Semantics + +namespace GolangLean.Core + +/-! # Determinism of TGC big-step. + + `BigStep h env e v₁ h₁ → BigStep h env e v₂ h₂ → v₁ = v₂ ∧ h₁ = h₂` + +By induction on the first derivation, with case analysis on the second. +For each pair of constructors, either the term-shape forces them to agree +(so we apply the IHs to the sub-derivations) or, in the `ifte` case where +two rules share a term shape, an IH on the condition gives a contradictory +boolean. -/ + +theorem BigStep.deterministic + {h : Heap} {env : Env} {e : Term} {v₁ v₂ : Value} {h₁ h₂ : Heap} + (D₁ : BigStep h env e v₁ h₁) (D₂ : BigStep h env e v₂ h₂) : + v₁ = v₂ ∧ h₁ = h₂ := by + induction D₁ generalizing v₂ h₂ with + | unitR => + cases D₂; exact ⟨rfl, rfl⟩ + | intLitR n => + cases D₂; exact ⟨rfl, rfl⟩ + | boolLitR b => + cases D₂; exact ⟨rfl, rfl⟩ + | varR hLook => + cases D₂ with + | varR hLook' => + have heq := hLook.symm.trans hLook' + exact ⟨Option.some.inj heq, rfl⟩ + | lamR x body => + cases D₂; exact ⟨rfl, rfl⟩ + | appR _ _ _ ih1 ih2 ihb => + cases D₂ with + | appR D1' D2' Db' => + have ⟨hClos, hH1⟩ := ih1 D1' + injection hClos with hx hbody henv + subst hx; subst hbody; subst henv; subst hH1 + have ⟨hArg, hH2⟩ := ih2 D2' + subst hArg; subst hH2 + exact ihb Db' + | letInR _ _ ih1 ih2 => + cases D₂ with + | letInR D1' D2' => + have ⟨hv1, hH1⟩ := ih1 D1' + subst hv1; subst hH1 + exact ih2 D2' + | ifTR _ _ ihc iht => + cases D₂ with + | ifTR Dc' Dt' => + have ⟨_, hH1⟩ := ihc Dc' + subst hH1 + exact iht Dt' + | ifFR Dc' _ => + have ⟨hb, _⟩ := ihc Dc' + injection hb with hb_eq + exact Bool.noConfusion hb_eq + | ifFR _ _ ihc ihf => + cases D₂ with + | ifTR Dc' _ => + have ⟨hb, _⟩ := ihc Dc' + injection hb with hb_eq + exact Bool.noConfusion hb_eq + | ifFR Dc' Df' => + have ⟨_, hH1⟩ := ihc Dc' + subst hH1 + exact ihf Df' + | binopR _ _ Hop ih1 ih2 => + cases D₂ with + | binopR D1' D2' Hop' => + have ⟨hv1, hH1⟩ := ih1 D1' + subst hv1; subst hH1 + have ⟨hv2, hH2⟩ := ih2 D2' + subst hv2; subst hH2 + have heq := Hop.symm.trans Hop' + exact ⟨Option.some.inj heq, rfl⟩ + | refMkR _ ih => + cases D₂ with + | refMkR D' => + have ⟨hv, hH⟩ := ih D' + subst hv; subst hH + exact ⟨rfl, rfl⟩ + | derefR _ Hget ih => + cases D₂ with + | derefR D' Hget' => + have ⟨hloc, hH⟩ := ih D' + injection hloc with hloceq + subst hloceq; subst hH + have heq := Hget.symm.trans Hget' + exact ⟨Option.some.inj heq, rfl⟩ + | assignR _ _ _ ih1 ih2 => + cases D₂ with + | assignR D1' D2' _ => + have ⟨hloc, hH1⟩ := ih1 D1' + injection hloc with hloceq + subst hloceq; subst hH1 + have ⟨hv, hH2⟩ := ih2 D2' + subst hv; subst hH2 + exact ⟨rfl, rfl⟩ + | seqR _ _ ih1 ih2 => + cases D₂ with + | seqR D1' D2' => + have ⟨_, hH1⟩ := ih1 D1' + subst hH1 + exact ih2 D2' + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Core/Eval.lean b/golang-lean/GolangLean/Core/Eval.lean new file mode 100644 index 0000000..5e9ec62 --- /dev/null +++ b/golang-lean/GolangLean/Core/Eval.lean @@ -0,0 +1,190 @@ +import GolangLean.Core.Semantics + +namespace GolangLean.Core + +/-! # Executable evaluator and soundness. + +A fuel-bounded recursive evaluator + `eval : Nat → Heap → Env → Term → Option (Value × Heap)` +together with the soundness theorem + `eval n h env e = some (v, h') → BigStep h env e v h'`. + +Soundness is the bridge that makes the inductive specification *runnable*. +For any closed TGC term you can write a Lean theorem of the form + `BigStep h env e v h'` +and prove it by `decide`-style execution: run `eval`, then apply +`eval_sound` to the equation. -/ + +def eval : Nat → Heap → Env → Term → Option (Value × Heap) + | 0, _, _, _ => none + | _ + 1, h, _, .unitT => some (.vUnit, h) + | _ + 1, h, _, .intLit k => some (.vInt k, h) + | _ + 1, h, _, .boolLit b => some (.vBool b, h) + | _ + 1, h, env, .var x => + match env.lookup x with + | some v => some (v, h) + | none => none + | _ + 1, h, env, .lam x body => some (.vClos x body env, h) + | n + 1, h, env, .app e1 e2 => + match eval n h env e1 with + | some (.vClos x body env', h1) => + match eval n h1 env e2 with + | some (v2, h2) => eval n h2 (env'.extend x v2) body + | none => none + | _ => none + | n + 1, h, env, .letIn x e1 e2 => + match eval n h env e1 with + | some (v1, h1) => eval n h1 (env.extend x v1) e2 + | none => none + | n + 1, h, env, .ifte ec e1 e2 => + match eval n h env ec with + | some (.vBool true, h1) => eval n h1 env e1 + | some (.vBool false, h1) => eval n h1 env e2 + | _ => none + | n + 1, h, env, .binop op e1 e2 => + match eval n h env e1 with + | some (v1, h1) => + match eval n h1 env e2 with + | some (v2, h2) => + match op.apply v1 v2 with + | some v => some (v, h2) + | none => none + | none => none + | none => none + | n + 1, h, env, .refMk e => + match eval n h env e with + | some (v, h1) => some (.vLoc h1.size, h1.push v) + | none => none + | n + 1, h, env, .deref e => + match eval n h env e with + | some (.vLoc loc, h1) => + match h1[loc]? with + | some v => some (v, h1) + | none => none + | _ => none + | n + 1, h, env, .assign e1 e2 => + match eval n h env e1 with + | some (.vLoc loc, h1) => + match eval n h1 env e2 with + | some (v2, h2) => + if loc < h2.size then some (.vUnit, h2.set! loc v2) else none + | none => none + | _ => none + | n + 1, h, env, .seq e1 e2 => + match eval n h env e1 with + | some (_, h1) => eval n h1 env e2 + | none => none + +theorem eval_sound : + ∀ (n : Nat) (h : Heap) (env : Env) (e : Term) (v : Value) (h' : Heap), + eval n h env e = some (v, h') → BigStep h env e v h' := by + intro n + induction n with + | zero => + intro h env e v h' heq + simp [eval] at heq + | succ n ih => + intro h env e v h' heq + cases e with + | unitT => + simp [eval] at heq + obtain ⟨rfl, rfl⟩ := heq + exact .unitR + | intLit k => + simp [eval] at heq + obtain ⟨rfl, rfl⟩ := heq + exact .intLitR k + | boolLit b => + simp [eval] at heq + obtain ⟨rfl, rfl⟩ := heq + exact .boolLitR b + | var x => + simp only [eval] at heq + split at heq + next vv hL => + simp at heq + obtain ⟨rfl, rfl⟩ := heq + exact .varR hL + next => simp at heq + | lam x body => + simp [eval] at heq + obtain ⟨rfl, rfl⟩ := heq + exact .lamR x body + | app e1 e2 => + simp only [eval] at heq + split at heq + next x body env' h1 heq1 => + split at heq + next v2 h2 heq2 => + exact .appR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq2) (ih _ _ _ _ _ heq) + next => simp at heq + all_goals simp at heq + | letIn x e1 e2 => + simp only [eval] at heq + split at heq + next v1 h1 heq1 => + exact .letInR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq) + next => simp at heq + | ifte ec e1 e2 => + simp only [eval] at heq + split at heq + next h1 heq1 => + exact .ifTR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq) + next h1 heq1 => + exact .ifFR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq) + all_goals simp at heq + | binop op e1 e2 => + simp only [eval] at heq + split at heq + next v1 h1 heq1 => + split at heq + next v2 h2 heq2 => + split at heq + next vv heqop => + simp at heq + obtain ⟨rfl, rfl⟩ := heq + exact .binopR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq2) heqop + next => simp at heq + next => simp at heq + next => simp at heq + | refMk e => + simp only [eval] at heq + split at heq + next vv h1 heq1 => + simp at heq + obtain ⟨rfl, rfl⟩ := heq + exact .refMkR (ih _ _ _ _ _ heq1) + next => simp at heq + | deref e => + simp only [eval] at heq + split at heq + next loc h1 heq1 => + split at heq + next vv hget => + simp at heq + obtain ⟨rfl, rfl⟩ := heq + exact .derefR (ih _ _ _ _ _ heq1) hget + next => simp at heq + all_goals simp at heq + | assign e1 e2 => + simp only [eval] at heq + split at heq + next loc h1 heq1 => + split at heq + next v2 h2 heq2 => + split at heq + next hb => + simp at heq + obtain ⟨rfl, rfl⟩ := heq + exact .assignR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq2) hb + next => simp at heq + next => simp at heq + all_goals simp at heq + | seq e1 e2 => + simp only [eval] at heq + split at heq + next _ h1 heq1 => + exact .seqR (ih _ _ _ _ _ heq1) (ih _ _ _ _ _ heq) + next => simp at heq + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Core/Preservation.lean b/golang-lean/GolangLean/Core/Preservation.lean new file mode 100644 index 0000000..de6d4bf --- /dev/null +++ b/golang-lean/GolangLean/Core/Preservation.lean @@ -0,0 +1,218 @@ +import GolangLean.Core.TypeSoundness + +namespace GolangLean.Core + +/-! # Preservation theorem for big-step semantics. + +`Γ ⊢ e : T ∧ HasTypeH ht h ∧ HasTypeEnv ht env Γ ∧ BigStep h env e v h' + ⟹ ∃ ht', HeapTy.extends ht ht' ∧ HasTypeH ht' h' ∧ HasTypeV ht' v T ∧ HasTypeEnv ht' env Γ` + +By induction on the big-step derivation, with case-splits on the typing +hypothesis. Each rule case threads heap-typings through its sub-derivations +and weakens earlier results across the extension. + +Big-step preservation has no progress analogue — the theorem only speaks +about derivations that exist. -/ + +/-! ## Heap extension by push. -/ + +theorem HeapTy.extends_push (ht : HeapTy) (T : Ty) : ht.extends (ht.push T) := by + refine ⟨by simp [Array.size_push], ?_⟩ + intros i T' hL + rw [Array.getElem?_push] + by_cases heq : i = ht.size + · subst heq + have hnone : ht[ht.size]? = none := Array.getElem?_eq_none (Nat.le_refl _) + rw [hnone] at hL + contradiction + · simp [heq]; exact hL + +/-! ## Heap-conformance preservation. -/ + +theorem HasTypeH.push + {ht : HeapTy} {h : Heap} {v : Value} {T : Ty} + (hH : HasTypeH ht h) (hV : HasTypeV ht v T) : + HasTypeH (ht.push T) (h.push v) := by + refine ⟨by simp [Array.size_push, hH.1], ?_⟩ + intros i T' hL + rw [Array.getElem?_push] at hL + by_cases heq : i = ht.size + · subst heq + simp at hL + subst hL + refine ⟨v, ?_, hV.weaken (HeapTy.extends_push ht T)⟩ + rw [Array.getElem?_push] + simp [hH.1] + · simp [heq] at hL + have ⟨v', hgv, hVT⟩ := hH.2 i T' hL + refine ⟨v', ?_, hVT.weaken (HeapTy.extends_push ht T)⟩ + rw [Array.getElem?_push] + have hisize : i ≠ h.size := hH.1 ▸ heq + simp [hisize, hgv] + +theorem HasTypeH.setIfInBounds + {ht : HeapTy} {h : Heap} {loc : Nat} {v : Value} {T : Ty} + (hH : HasTypeH ht h) (hV : HasTypeV ht v T) + (hLoc : (ht[loc]? : Option Ty) = some T) : + HasTypeH ht (h.setIfInBounds loc v) := by + refine ⟨by rw [Array.size_setIfInBounds]; exact hH.1, ?_⟩ + intros i T' hL + by_cases hi : loc = i + · subst hi + rw [hL] at hLoc + simp at hLoc + subst hLoc + have hloc_lt : loc < h.size := by + rw [← hH.1] + rcases Nat.lt_or_ge loc ht.size with hlt | hge + · exact hlt + · rw [Array.getElem?_eq_none hge] at hL + contradiction + have heq : (h.setIfInBounds loc v)[loc]? = some v := by + rw [Array.getElem?_setIfInBounds]; simp [hloc_lt] + exact ⟨v, heq, hV⟩ + · have ⟨v', hgv, hVT⟩ := hH.2 i T' hL + have heq : (h.setIfInBounds loc v)[i]? = some v' := by + rw [Array.getElem?_setIfInBounds, if_neg hi]; exact hgv + exact ⟨v', heq, hVT⟩ + +/-! ## Inversion for binop typing. -/ + +theorem binop_apply_sound + {ht : HeapTy} {op : BinOp} {v1 v2 v : Value} {T1 T2 T : Ty} + (hOp : op.typeOf T1 T2 = some T) + (hV1 : HasTypeV ht v1 T1) (hV2 : HasTypeV ht v2 T2) + (hApp : op.apply v1 v2 = some v) : + HasTypeV ht v T := by + cases op <;> cases T1 <;> cases T2 <;> simp [BinOp.typeOf] at hOp <;> + (try (cases hOp; cases hV1; cases hV2; simp [BinOp.apply] at hApp; cases hApp; constructor)) + +/-! ## Preservation. -/ + +theorem preservation : + ∀ {h env e v h'} (D : BigStep h env e v h') + {ht Γ T} (hT : HasType Γ e T) + (hH : HasTypeH ht h) (hE : HasTypeEnv ht env Γ), + ∃ ht', HeapTy.extends ht ht' ∧ HasTypeH ht' h' ∧ + HasTypeV ht' v T ∧ HasTypeEnv ht' env Γ := by + intros h env e v h' D + induction D with + | unitR => + intros ht Γ T hT hH hE + cases hT + exact ⟨ht, HeapTy.extends_refl _, hH, .vUnit, hE⟩ + | intLitR n => + intros ht Γ T hT hH hE + cases hT + exact ⟨ht, HeapTy.extends_refl _, hH, .vInt n, hE⟩ + | boolLitR b => + intros ht Γ T hT hH hE + cases hT + exact ⟨ht, HeapTy.extends_refl _, hH, .vBool b, hE⟩ + | varR hLook => + intros ht Γ T hT hH hE + cases hT with + | var hLookT => + have ⟨v', hLook', hTV⟩ := hE.lookup_correspondence hLookT + rw [hLook] at hLook' + cases hLook' + exact ⟨ht, HeapTy.extends_refl _, hH, hTV, hE⟩ + | lamR x body => + intros ht Γ T hT hH hE + cases hT with + | lam hBody => exact ⟨ht, HeapTy.extends_refl _, hH, .vClos hE hBody, hE⟩ + | appR _ _ _ ih1 ih2 ihb => + intros ht Γ T hT hH hE + cases hT with + | app hT1 hT2 => + have ⟨ht1, ext01, hH1, hClosT, hE1⟩ := ih1 hT1 hH hE + cases hClosT with + | vClos hEnv' hBody => + have ⟨ht2, ext12, hH2, hArgT, _⟩ := ih2 hT2 hH1 hE1 + have hEnv2 := hEnv'.weaken ext12 + have ⟨ht3, ext23, hH3, hValT, _⟩ := + ihb hBody hH2 (HasTypeEnv.cons hArgT hEnv2) + let totalExt := HeapTy.extends_trans (HeapTy.extends_trans ext01 ext12) ext23 + exact ⟨ht3, totalExt, hH3, hValT, hE.weaken totalExt⟩ + | letInR _ _ ih1 ih2 => + intros ht Γ T hT hH hE + cases hT with + | letIn hT1 hT2 => + have ⟨ht1, ext01, hH1, hV1, hE1⟩ := ih1 hT1 hH hE + have ⟨ht2, ext12, hH2, hV2, _⟩ := + ih2 hT2 hH1 (HasTypeEnv.cons hV1 hE1) + let totalExt := HeapTy.extends_trans ext01 ext12 + exact ⟨ht2, totalExt, hH2, hV2, hE.weaken totalExt⟩ + | ifTR _ _ ihc iht => + intros ht Γ T hT hH hE + cases hT with + | ifte hTc hT1 _ => + have ⟨ht1, ext01, hH1, _, hE1⟩ := ihc hTc hH hE + have ⟨ht2, ext12, hH2, hV2, _⟩ := iht hT1 hH1 hE1 + let totalExt := HeapTy.extends_trans ext01 ext12 + exact ⟨ht2, totalExt, hH2, hV2, hE.weaken totalExt⟩ + | ifFR _ _ ihc ihf => + intros ht Γ T hT hH hE + cases hT with + | ifte hTc _ hT2 => + have ⟨ht1, ext01, hH1, _, hE1⟩ := ihc hTc hH hE + have ⟨ht2, ext12, hH2, hV2, _⟩ := ihf hT2 hH1 hE1 + let totalExt := HeapTy.extends_trans ext01 ext12 + exact ⟨ht2, totalExt, hH2, hV2, hE.weaken totalExt⟩ + | binopR _ _ hOp ih1 ih2 => + intros ht Γ T hT hH hE + cases hT with + | binop hT1 hT2 hOpT => + have ⟨ht1, ext01, hH1, hV1, hE1⟩ := ih1 hT1 hH hE + have ⟨ht2, ext12, hH2, hV2, _⟩ := ih2 hT2 hH1 hE1 + let totalExt := HeapTy.extends_trans ext01 ext12 + have hV1ext := hV1.weaken ext12 + have hVT := binop_apply_sound hOpT hV1ext hV2 hOp + exact ⟨ht2, totalExt, hH2, hVT, hE.weaken totalExt⟩ + | refMkR _ ih => + intros ht Γ T hT hH hE + cases hT with + | @refMk _ _ T_inner hT1 => + have ⟨ht1, ext01, hH1, hV, _⟩ := ih hT1 hH hE + have extPush := HeapTy.extends_push ht1 T_inner + have newExt := HeapTy.extends_trans ext01 extPush + have hHnew := hH1.push hV + have hVnew : HasTypeV (ht1.push T_inner) (.vLoc ht1.size) (.ref T_inner) := by + apply HasTypeV.vLoc + rw [Array.getElem?_push] + simp + rw [hH1.1] at hVnew + exact ⟨ht1.push T_inner, newExt, hHnew, hVnew, hE.weaken newExt⟩ + | derefR _ Hget ih => + intros ht Γ T hT hH hE + cases hT with + | @deref _ _ T_inner hT1 => + have ⟨ht1, ext01, hH1, hVLoc, _⟩ := ih hT1 hH hE + cases hVLoc with + | vLoc hLocT => + have ⟨v', hgv, hVT⟩ := hH1.2 _ _ hLocT + rw [Hget] at hgv + cases hgv + exact ⟨ht1, ext01, hH1, hVT, hE.weaken ext01⟩ + | assignR _ _ Hin ih1 ih2 => + intros ht Γ T hT hH hE + cases hT with + | @assign _ _ _ T_target hT1 hT2 => + have ⟨ht1, ext01, hH1, hVLoc, hE1⟩ := ih1 hT1 hH hE + have ⟨ht2, ext12, hH2, hV2, _⟩ := ih2 hT2 hH1 hE1 + cases hVLoc with + | vLoc hLocT => + have hLocT2 := ext12.2 _ _ hLocT + have hH2new := hH2.setIfInBounds hV2 hLocT2 + have totalExt := HeapTy.extends_trans ext01 ext12 + exact ⟨ht2, totalExt, hH2new, .vUnit, hE.weaken totalExt⟩ + | seqR _ _ ih1 ih2 => + intros ht Γ T hT hH hE + cases hT with + | seq hT1 hT2 => + have ⟨ht1, ext01, hH1, _, hE1⟩ := ih1 hT1 hH hE + have ⟨ht2, ext12, hH2, hV2, _⟩ := ih2 hT2 hH1 hE1 + let totalExt := HeapTy.extends_trans ext01 ext12 + exact ⟨ht2, totalExt, hH2, hV2, hE.weaken totalExt⟩ + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Core/Semantics.lean b/golang-lean/GolangLean/Core/Semantics.lean new file mode 100644 index 0000000..ac71647 --- /dev/null +++ b/golang-lean/GolangLean/Core/Semantics.lean @@ -0,0 +1,130 @@ +import GolangLean.Core.Syntax + +namespace GolangLean.Core + +/-! # Big-step operational semantics for Tiny Go Core. + +State is `Heap × Env`. The heap is an `Array Value`; references are +indices. Env is a list of (name, value) bindings — innermost shadowing +outermost. Closures capture the env at the point of `lam`-evaluation +(lexical scope, as in Go). + +The relation is + + BigStep h env e v h' + +read "starting from heap `h` and env `env`, term `e` evaluates to value +`v` and produces heap `h'`". One inductive constructor per syntactic +form. Every premise either appeals to `BigStep` recursively or to a +total side-condition (lookup, binop application, bounds). -/ + +mutual + + inductive Value where + | vUnit : Value + | vInt : Int → Value + | vBool : Bool → Value + | vClos : String → Term → EnvList → Value + | vLoc : Nat → Value + + inductive EnvList where + | nil : EnvList + | cons : String → Value → EnvList → EnvList + +end + +mutual + partial def Value.repr : Value → Std.Format + | .vUnit => "vUnit" + | .vInt n => f!"vInt {n}" + | .vBool b => f!"vBool {b}" + | .vClos x _ _ => f!"" + | .vLoc n => f!"vLoc {n}" + partial def EnvList.repr : EnvList → Std.Format + | .nil => "[]" + | .cons k v r => f!"{k}↦{Value.repr v} :: {EnvList.repr r}" +end + +instance : Repr Value where + reprPrec v _ := Value.repr v +instance : Repr EnvList where + reprPrec env _ := EnvList.repr env + +abbrev Env := EnvList +abbrev Heap := Array Value + +namespace EnvList + +def lookup : EnvList → String → Option Value + | .nil, _ => none + | .cons k v r, x => if k = x then some v else lookup r x + +def extend (env : EnvList) (x : String) (v : Value) : EnvList := + .cons x v env + +end EnvList + +namespace BinOp + +def apply : BinOp → Value → Value → Option Value + | .add, .vInt a, .vInt b => some (.vInt (a + b)) + | .sub, .vInt a, .vInt b => some (.vInt (a - b)) + | .mul, .vInt a, .vInt b => some (.vInt (a * b)) + | .eq, .vInt a, .vInt b => some (.vBool (a == b)) + | .eq, .vBool a, .vBool b => some (.vBool (a == b)) + | .lt, .vInt a, .vInt b => some (.vBool (a < b)) + | _, _, _ => none + +end BinOp + +inductive BigStep : Heap → Env → Term → Value → Heap → Prop where + | unitR {h env} : + BigStep h env .unitT .vUnit h + | intLitR {h env} (n : Int) : + BigStep h env (.intLit n) (.vInt n) h + | boolLitR {h env} (b : Bool) : + BigStep h env (.boolLit b) (.vBool b) h + | varR {h env x v} (hLook : env.lookup x = some v) : + BigStep h env (.var x) v h + | lamR {h env} (x : String) (body : Term) : + BigStep h env (.lam x body) (.vClos x body env) h + | appR {h env e1 e2 x body env' v_arg v h1 h2 h3} + (D1 : BigStep h env e1 (.vClos x body env') h1) + (D2 : BigStep h1 env e2 v_arg h2) + (Db : BigStep h2 (env'.extend x v_arg) body v h3) : + BigStep h env (.app e1 e2) v h3 + | letInR {h env x e1 e2 v1 v2 h1 h2} + (D1 : BigStep h env e1 v1 h1) + (D2 : BigStep h1 (env.extend x v1) e2 v2 h2) : + BigStep h env (.letIn x e1 e2) v2 h2 + | ifTR {h env e e1 e2 v h1 h2} + (Dc : BigStep h env e (.vBool true) h1) + (Dt : BigStep h1 env e1 v h2) : + BigStep h env (.ifte e e1 e2) v h2 + | ifFR {h env e e1 e2 v h1 h2} + (Dc : BigStep h env e (.vBool false) h1) + (Df : BigStep h1 env e2 v h2) : + BigStep h env (.ifte e e1 e2) v h2 + | binopR {h env op e1 e2 v1 v2 v h1 h2} + (D1 : BigStep h env e1 v1 h1) + (D2 : BigStep h1 env e2 v2 h2) + (Hop : op.apply v1 v2 = some v) : + BigStep h env (.binop op e1 e2) v h2 + | refMkR {h env e v h1} + (D : BigStep h env e v h1) : + BigStep h env (.refMk e) (.vLoc h1.size) (h1.push v) + | derefR {h env e loc v h1} + (D : BigStep h env e (.vLoc loc) h1) + (Hget : h1[loc]? = some v) : + BigStep h env (.deref e) v h1 + | assignR {h env e1 e2 loc v h1 h2} + (D1 : BigStep h env e1 (.vLoc loc) h1) + (D2 : BigStep h1 env e2 v h2) + (Hin : loc < h2.size) : + BigStep h env (.assign e1 e2) .vUnit (h2.set! loc v) + | seqR {h env e1 e2 v1 v2 h1 h2} + (D1 : BigStep h env e1 v1 h1) + (D2 : BigStep h1 env e2 v2 h2) : + BigStep h env (.seq e1 e2) v2 h2 + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Core/Syntax.lean b/golang-lean/GolangLean/Core/Syntax.lean new file mode 100644 index 0000000..b78dcf8 --- /dev/null +++ b/golang-lean/GolangLean/Core/Syntax.lean @@ -0,0 +1,36 @@ +namespace GolangLean.Core + +/-! # Tiny Go Core (TGC) — abstract syntax. + +A small calculus that captures the *essential computation* of Go's +sequential fragment: variables, functions, mutable references (Go's `&`/`*`), +sequencing, and conditionals. Everything in surface Go is intended to +desugar into this kernel. + +What is *not* here: types (introduced in a later module), goroutines, +channels, structs, slices, methods, interfaces. The point of starting tiny +is that every theorem we prove about TGC carries over once those +extensions are added compositionally. -/ + +inductive BinOp where + | add | sub | mul + | eq | lt + deriving Repr, BEq, DecidableEq, Inhabited + +inductive Term where + | unitT : Term + | intLit : Int → Term + | boolLit : Bool → Term + | var : String → Term + | lam : String → Term → Term -- λ x. e + | app : Term → Term → Term + | letIn : String → Term → Term → Term -- let x = e₁ in e₂ + | ifte : Term → Term → Term → Term + | binop : BinOp → Term → Term → Term + | refMk : Term → Term -- &e (allocate) + | deref : Term → Term -- *e (read) + | assign : Term → Term → Term -- *p = e + | seq : Term → Term → Term + deriving Repr, Inhabited + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Core/TypeSoundness.lean b/golang-lean/GolangLean/Core/TypeSoundness.lean new file mode 100644 index 0000000..ce6da0c --- /dev/null +++ b/golang-lean/GolangLean/Core/TypeSoundness.lean @@ -0,0 +1,101 @@ +import GolangLean.Core.Types +import GolangLean.Core.Semantics + +namespace GolangLean.Core + +/-! # Type soundness for TGC. + +Big-step preservation: if `Γ ⊢ e : T` and `BigStep h env e v h'` under +compatible heap/env typings, then there exists an extended heap-typing +under which `v : T` and the new heap is well-typed. There is no +small-step "progress" analogue — big-step proves "if it terminates, the +result has the expected type". -/ + +abbrev HeapTy := Array Ty + +mutual + + inductive HasTypeV : HeapTy → Value → Ty → Prop where + | vUnit {ht} : HasTypeV ht .vUnit .unit + | vInt {ht} (n : Int) : HasTypeV ht (.vInt n) .int + | vBool {ht} (b : Bool) : HasTypeV ht (.vBool b) .bool + | vLoc {ht loc T} (h : (ht[loc]? : Option Ty) = some T) : + HasTypeV ht (.vLoc loc) (.ref T) + | vClos {ht x body env Γ T_arg T_ret} + (he : HasTypeEnv ht env Γ) + (hb : HasType (Γ.extend x T_arg) body T_ret) : + HasTypeV ht (.vClos x body env) (.arrow T_arg T_ret) + + inductive HasTypeEnv : HeapTy → Env → TyEnv → Prop where + | nil {ht} : HasTypeEnv ht .nil [] + | cons {ht x v T env Γ} + (hv : HasTypeV ht v T) + (he : HasTypeEnv ht env Γ) : + HasTypeEnv ht (.cons x v env) ((x, T) :: Γ) + +end + +def HasTypeH (ht : HeapTy) (h : Heap) : Prop := + ht.size = h.size ∧ + ∀ (i : Nat) (T : Ty), (ht[i]? : Option Ty) = some T → + ∃ v, (h[i]? : Option Value) = some v ∧ HasTypeV ht v T + +def HeapTy.extends (ht ht' : HeapTy) : Prop := + ht.size ≤ ht'.size ∧ + ∀ (i : Nat) (T : Ty), (ht[i]? : Option Ty) = some T → (ht'[i]? : Option Ty) = some T + +namespace HeapTy + +theorem extends_refl (ht : HeapTy) : ht.extends ht := + ⟨Nat.le_refl _, fun _ _ h => h⟩ + +theorem extends_trans {ht₁ ht₂ ht₃ : HeapTy} + (h₁ : ht₁.extends ht₂) (h₂ : ht₂.extends ht₃) : ht₁.extends ht₃ := + ⟨Nat.le_trans h₁.1 h₂.1, fun i T h => h₂.2 i T (h₁.2 i T h)⟩ + +end HeapTy + +/-! ## Weakening under heap-typing extension. -/ + +mutual + + theorem HasTypeV.weaken {ht ht' v T} + (h : HasTypeV ht v T) (ext : HeapTy.extends ht ht') : + HasTypeV ht' v T := by + cases h with + | vUnit => exact .vUnit + | vInt n => exact .vInt n + | vBool b => exact .vBool b + | vLoc hLoc => exact .vLoc (ext.2 _ _ hLoc) + | vClos he hb => exact .vClos (HasTypeEnv.weaken he ext) hb + + theorem HasTypeEnv.weaken {ht ht' env Γ} + (h : HasTypeEnv ht env Γ) (ext : HeapTy.extends ht ht') : + HasTypeEnv ht' env Γ := by + cases h with + | nil => exact .nil + | cons hv he => exact .cons (HasTypeV.weaken hv ext) (HasTypeEnv.weaken he ext) + +end + +/-! ## Lookup correspondence: well-typed env produces well-typed values. -/ + +theorem HasTypeEnv.lookup_correspondence : + ∀ {ht env Γ x T}, + HasTypeEnv ht env Γ → Γ.lookup x = some T → + ∃ v, env.lookup x = some v ∧ HasTypeV ht v T + | _, _, _, _, _, .nil, hL => by simp [TyEnv.lookup] at hL + | _, _, _, x, _, @HasTypeEnv.cons _ x' v' T' env' Γ' hv he, hL => by + by_cases hx : x' = x + · subst hx + simp only [TyEnv.lookup] at hL + simp at hL + cases hL + refine ⟨v', ?_, hv⟩ + simp [EnvList.lookup] + · simp only [TyEnv.lookup, if_neg hx] at hL + have ⟨v, hLv, hTv⟩ := lookup_correspondence he hL + refine ⟨v, ?_, hTv⟩ + simp [EnvList.lookup, hx, hLv] + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Core/Types.lean b/golang-lean/GolangLean/Core/Types.lean new file mode 100644 index 0000000..dea76bb --- /dev/null +++ b/golang-lean/GolangLean/Core/Types.lean @@ -0,0 +1,90 @@ +import GolangLean.Core.Syntax + +namespace GolangLean.Core + +/-! # Static type system for TGC. + +Five base types: `unit`, `int`, `bool`, `arrow τ₁ τ₂`, `ref τ`. Typing +context `TyEnv` is a list of (name, type) bindings — innermost-first to +match `Env`. + +Typing rules `HasType : TyEnv → Term → Ty → Prop` follow the standard +simply-typed lambda calculus + ML-style references discipline. -/ + +inductive Ty where + | unit : Ty + | int : Ty + | bool : Ty + | arrow : Ty → Ty → Ty + | ref : Ty → Ty + deriving Repr, BEq, DecidableEq, Inhabited + +abbrev TyEnv := List (String × Ty) + +namespace TyEnv + +def lookup : TyEnv → String → Option Ty + | [], _ => none + | (k, T) :: rs, x => if k = x then some T else lookup rs x + +def extend (Γ : TyEnv) (x : String) (T : Ty) : TyEnv := + (x, T) :: Γ + +end TyEnv + +namespace BinOp + +def typeOf : BinOp → Ty → Ty → Option Ty + | .add, .int, .int => some .int + | .sub, .int, .int => some .int + | .mul, .int, .int => some .int + | .eq, .int, .int => some .bool + | .eq, .bool, .bool => some .bool + | .lt, .int, .int => some .bool + | _, _, _ => none + +end BinOp + +inductive HasType : TyEnv → Term → Ty → Prop where + | unitT {Γ} : HasType Γ .unitT .unit + | intLit {Γ} (n : Int) : HasType Γ (.intLit n) .int + | boolLit {Γ} (b : Bool) : HasType Γ (.boolLit b) .bool + | var {Γ x T} (h : Γ.lookup x = some T) : + HasType Γ (.var x) T + | lam {Γ x body T_arg T_ret} + (h : HasType (Γ.extend x T_arg) body T_ret) : + HasType Γ (.lam x body) (.arrow T_arg T_ret) + | app {Γ e1 e2 T_arg T_ret} + (h1 : HasType Γ e1 (.arrow T_arg T_ret)) + (h2 : HasType Γ e2 T_arg) : + HasType Γ (.app e1 e2) T_ret + | letIn {Γ x e1 e2 T1 T2} + (h1 : HasType Γ e1 T1) + (h2 : HasType (Γ.extend x T1) e2 T2) : + HasType Γ (.letIn x e1 e2) T2 + | ifte {Γ ec e1 e2 T} + (hc : HasType Γ ec .bool) + (h1 : HasType Γ e1 T) + (h2 : HasType Γ e2 T) : + HasType Γ (.ifte ec e1 e2) T + | binop {Γ op e1 e2 T1 T2 T} + (h1 : HasType Γ e1 T1) + (h2 : HasType Γ e2 T2) + (hOp : op.typeOf T1 T2 = some T) : + HasType Γ (.binop op e1 e2) T + | refMk {Γ e T} + (h : HasType Γ e T) : + HasType Γ (.refMk e) (.ref T) + | deref {Γ e T} + (h : HasType Γ e (.ref T)) : + HasType Γ (.deref e) T + | assign {Γ e1 e2 T} + (h1 : HasType Γ e1 (.ref T)) + (h2 : HasType Γ e2 T) : + HasType Γ (.assign e1 e2) .unit + | seq {Γ e1 e2 T1 T2} + (h1 : HasType Γ e1 T1) + (h2 : HasType Γ e2 T2) : + HasType Γ (.seq e1 e2) T2 + +end GolangLean.Core diff --git a/golang-lean/GolangLean/Env.lean b/golang-lean/GolangLean/Env.lean new file mode 100644 index 0000000..fb0d051 --- /dev/null +++ b/golang-lean/GolangLean/Env.lean @@ -0,0 +1,37 @@ +import GolangLean.Value + +namespace GolangLean + +/-! # Environment / heap. + +Distinct from octive-lean's flat `Env`: Go has lexically-nested scopes, +addressable variables (so a `&x` produces a stable heap index), and a +package-level namespace. This is a placeholder shape; flesh out as needed. -/ + +abbrev Frame := List (String × Value) + +structure Env where + frames : List Frame -- top of list = innermost scope + package : List (String × Value) -- package-level decls + heap : Array Value -- addressable cells (`&x` returns an index) + builtins : List (String × (Array Value → IO (Except String Value))) + deriving Inhabited + +namespace Env + +def empty : Env := + { frames := [[]], package := [], heap := #[], builtins := [] } + +/-- Look up a name: innermost frame first, then package, then builtins (later). -/ +def lookup (env : Env) (name : String) : Option Value := + let rec go : List Frame → Option Value + | [] => env.package.lookup name + | f :: fs => + match f.lookup name with + | some v => some v + | none => go fs + go env.frames + +end Env + +end GolangLean diff --git a/golang-lean/GolangLean/Error.lean b/golang-lean/GolangLean/Error.lean new file mode 100644 index 0000000..ba02c2c --- /dev/null +++ b/golang-lean/GolangLean/Error.lean @@ -0,0 +1,27 @@ +namespace GolangLean + +/-- Runtime / static errors produced anywhere in the Go toolchain. -/ +inductive GoError where + | scan (msg : String) (line col : Nat) -- lexical + | parse (msg : String) (line col : Nat) -- syntactic + | typeErr (msg : String) -- static type-check + | runtime (msg : String) -- panic / runtime + | nilDeref -- nil-pointer dereference + | indexOOB (i n : Int) -- index out of range + | divByZero + | notImpl (what : String) -- placeholder for stubs + deriving Repr, Inhabited + +def GoError.toString : GoError → String + | .scan m l c => s!"scan error: {m} ({l}:{c})" + | .parse m l c => s!"parse error: {m} ({l}:{c})" + | .typeErr m => s!"type error: {m}" + | .runtime m => s!"runtime error: {m}" + | .nilDeref => "runtime error: nil pointer dereference" + | .indexOOB i n => s!"runtime error: index out of range [{i}] with length {n}" + | .divByZero => "runtime error: integer divide by zero" + | .notImpl what => s!"not implemented: {what}" + +instance : ToString GoError := ⟨GoError.toString⟩ + +end GolangLean diff --git a/golang-lean/GolangLean/Eval.lean b/golang-lean/GolangLean/Eval.lean new file mode 100644 index 0000000..4cf36d6 --- /dev/null +++ b/golang-lean/GolangLean/Eval.lean @@ -0,0 +1,25 @@ +import GolangLean.AST +import GolangLean.Env +import GolangLean.Error + +namespace GolangLean + +/-! # Evaluator — operational big-step interpreter over the AST. + +Mirror of octive-lean's `Eval`. Monad stack `ExceptT GoError (StateT Env IO)`, +chosen for the same reason: state survives non-local control flow (defer, +panic, goroutine spawn) carried as exceptions. -/ + +abbrev EvalM := ExceptT GoError (StateT Env IO) + +partial def evalExpr (_e : Expr) : EvalM Value := + throw (.notImpl "Eval.evalExpr") + +partial def evalStmt (_s : Stmt) : EvalM Unit := + throw (.notImpl "Eval.evalStmt") + +def runFile (path : String) : IO UInt32 := do + IO.eprintln s!"golang-lean: would execute {path} (interpreter not yet implemented)" + return 1 + +end GolangLean diff --git a/golang-lean/GolangLean/Parser.lean b/golang-lean/GolangLean/Parser.lean new file mode 100644 index 0000000..7c57d11 --- /dev/null +++ b/golang-lean/GolangLean/Parser.lean @@ -0,0 +1,16 @@ +import GolangLean.Scanner +import GolangLean.AST + +namespace GolangLean + +/-! # Parser — port of `go/parser/parser.go`. + +Stub. Recursive-descent over the token stream. We mirror upstream: each +non-terminal in the Go spec corresponds to one `parseX` function, named +identically. -/ + +partial def parseFile (src : String) : Except GoError GoFile := do + let _toks ← scan src + .error (.notImpl "Parser.parseFile: see go-upstream/src/go/parser/parser.go") + +end GolangLean diff --git a/golang-lean/GolangLean/PureEval.lean b/golang-lean/GolangLean/PureEval.lean new file mode 100644 index 0000000..268d343 --- /dev/null +++ b/golang-lean/GolangLean/PureEval.lean @@ -0,0 +1,18 @@ +import GolangLean.AST +import GolangLean.Value +import GolangLean.Env + +namespace GolangLean + +/-! # Pure (non-IO) evaluator. + +Mirror of octive-lean's `PureEval`. Strips IO so that the evaluator becomes +a `def` (not `partial def`) over a finite fuel parameter, suitable for +reasoning. The relationship to `Eval.evalExpr` is captured by `ValueEquiv`. + +Stub for now — fill in once `Eval` is non-trivial. -/ + +def evalExprP (_fuel : Nat) (_env : Env) (_e : Expr) : Option (Value × Env) := + none + +end GolangLean diff --git a/golang-lean/GolangLean/REPL.lean b/golang-lean/GolangLean/REPL.lean new file mode 100644 index 0000000..9f33ea8 --- /dev/null +++ b/golang-lean/GolangLean/REPL.lean @@ -0,0 +1,24 @@ +import GolangLean.Eval + +namespace GolangLean + +/-! # REPL — placeholder. + +Go has no official REPL, but `gore` and `yaegi` show it is feasible: each +input is wrapped into a fresh `func main()` body or evaluated as a statement +sequence inside a persistent package-scope. We follow the latter approach. -/ + +partial def runREPL : IO Unit := do + IO.println "golang-lean v0.1 — interpreter scaffold (REPL not yet implemented)" + IO.println "Type Ctrl-D to exit." + let stdin ← IO.getStdin + let rec loop : IO Unit := do + IO.print "go> " + (← IO.getStdout).flush + let line ← stdin.getLine + if line.isEmpty then return () + IO.println s!" parsed: {line.trimAscii} (stub)" + loop + loop + +end GolangLean diff --git a/golang-lean/GolangLean/Scanner.lean b/golang-lean/GolangLean/Scanner.lean new file mode 100644 index 0000000..30a916a --- /dev/null +++ b/golang-lean/GolangLean/Scanner.lean @@ -0,0 +1,18 @@ +import GolangLean.Token +import GolangLean.Error + +namespace GolangLean + +/-! # Scanner — port of `go/scanner/scanner.go`. + +Stub. The real implementation is a hand-written DFA over UTF-8 byte input, +faithful to the upstream Go scanner. We start with a `notImpl` sentinel so +the project compiles, then fill in token-by-token alongside the parser. -/ + +partial def scan (src : String) : Except GoError (Array Token) := do + if src.isEmpty then + return #[⟨.eof, ⟨1, 1, 0⟩⟩] + else + .error (.notImpl "Scanner.scan: see go-upstream/src/go/scanner/scanner.go") + +end GolangLean diff --git a/golang-lean/GolangLean/Token.lean b/golang-lean/GolangLean/Token.lean new file mode 100644 index 0000000..eefefdb --- /dev/null +++ b/golang-lean/GolangLean/Token.lean @@ -0,0 +1,82 @@ +namespace GolangLean + +/-! # Token kinds — mirrors `go/token/token.go`. + +This is a faithful Lean port of the Go reference scanner's token enumeration. +Comments next to each constructor cite the Go name where it differs in casing +or spelling. Source: `go-upstream/src/go/token/token.go`. -/ + +inductive Tok where + -- Special + | illegal + | eof + | comment + -- Literals (carry their lexeme) + | ident (s : String) + | intLit (s : String) + | floatLit (s : String) + | imagLit (s : String) + | charLit (s : String) + | stringLit (s : String) + -- Operators & punctuation + | add | sub | mul | quo | rem + | and | or | xor | shl | shr | andNot + | addAssign | subAssign | mulAssign | quoAssign | remAssign + | andAssign | orAssign | xorAssign | shlAssign | shrAssign | andNotAssign + | landTok | lorTok | arrow | inc | dec + | eql | lss | gtr | assign | not + | neq | leq | geq | define | ellipsis + | lparen | lbrack | lbrace | comma | period + | rparen | rbrack | rbrace | semicolon | colon + -- Keywords + | breakK | caseK | chanK | constK | continueK + | defaultK | deferK | elseK | fallthroughK | forK + | funcK | goK | gotoK | ifK | importK + | interfaceK | mapK | packageK | rangeK | returnK + | selectK | structK | switchK | typeK | varK + -- Tilde introduced for generics (Go 1.18) + | tilde + deriving Repr, BEq, Inhabited + +/-- Source-position metadata attached to a token. -/ +structure Pos where + line : Nat + col : Nat + off : Nat + deriving Repr, BEq, Inhabited + +structure Token where + tok : Tok + pos : Pos + deriving Repr, Inhabited + +/-- Map a keyword spelling to its token, or `none` if it is just an identifier. -/ +def keyword? : String → Option Tok + | "break" => some .breakK + | "case" => some .caseK + | "chan" => some .chanK + | "const" => some .constK + | "continue" => some .continueK + | "default" => some .defaultK + | "defer" => some .deferK + | "else" => some .elseK + | "fallthrough" => some .fallthroughK + | "for" => some .forK + | "func" => some .funcK + | "go" => some .goK + | "goto" => some .gotoK + | "if" => some .ifK + | "import" => some .importK + | "interface" => some .interfaceK + | "map" => some .mapK + | "package" => some .packageK + | "range" => some .rangeK + | "return" => some .returnK + | "select" => some .selectK + | "struct" => some .structK + | "switch" => some .switchK + | "type" => some .typeK + | "var" => some .varK + | _ => none + +end GolangLean diff --git a/golang-lean/GolangLean/Value.lean b/golang-lean/GolangLean/Value.lean new file mode 100644 index 0000000..b76789b --- /dev/null +++ b/golang-lean/GolangLean/Value.lean @@ -0,0 +1,34 @@ +import GolangLean.AST + +namespace GolangLean + +/-! # Runtime values. + +Go is statically typed, so a faithful runtime keeps a `GoType` alongside each +value. For the first cut we keep things untyped (a tagged union) and recover +type safety later by adding a static type-checker pass on the AST. The shape +mirrors octive-lean's `Value`. -/ + +mutual + + inductive Value where + | nil + | boolV : Bool → Value + | intV : Int → Value + | floatV : Float → Value + | stringV : String → Value + | sliceV : Array Value → Value + | mapV : Array (Value × Value) → Value -- ordered list, equality via key + | structV : Array (String × Value) → Value + | ptrV : Nat → Value -- index into heap + | funcV : Closure → Value + | chanV : Nat → Value -- index into channel table + | tupleV : Array Value → Value -- multi-return packs + + /-- A function closure: signature + body + captured environment. -/ + inductive Closure where + | mk : FuncType → Array Stmt → List (String × Value) → Closure + +end + +end GolangLean diff --git a/golang-lean/GolangLean/ValueEquiv.lean b/golang-lean/GolangLean/ValueEquiv.lean new file mode 100644 index 0000000..40e1841 --- /dev/null +++ b/golang-lean/GolangLean/ValueEquiv.lean @@ -0,0 +1,17 @@ +import GolangLean.PureEval +import GolangLean.BigStep + +namespace GolangLean + +/-! # Equivalence between evaluators. + +Mirror of octive-lean's `ValueEquiv`. The theorem to prove is: + + `evalExprP fuel env e = some (v, env') ↔ BigStepExpr env e v env'` + (modulo fuel being large enough) + +This bridge is what lets us *use* the executable evaluator inside proofs: +once `evalExprP` is shown to compute the same relation as `BigStepExpr`, +running the interpreter is itself a proof. Stub. -/ + +end GolangLean diff --git a/golang-lean/Main.lean b/golang-lean/Main.lean new file mode 100644 index 0000000..b23b434 --- /dev/null +++ b/golang-lean/Main.lean @@ -0,0 +1,10 @@ +import GolangLean + +open GolangLean in +def main (args : List String) : IO UInt32 := do + match args with + | [] => runREPL; return 0 + | [path] => runFile path + | _ => + IO.eprintln "Usage: golang-lean [script.go]" + return 1 diff --git a/golang-lean/README.md b/golang-lean/README.md new file mode 100644 index 0000000..9464fe7 --- /dev/null +++ b/golang-lean/README.md @@ -0,0 +1,64 @@ +# golang-lean + +A Lean 4 reimplementation of the [Go programming language](https://go.dev/) — using the upstream reference compiler/parser as the source of truth, the way `octive-lean` uses GNU Octave. + +Built parallel to `octive-lean`. Goals: + +1. **Working Go interpreter** for a useful subset (start: pure Go, no goroutines). +2. **Formal semantics** layered on top: `BigStep` / `PureEval` / `ValueEquiv` so proofs about Go programs are first-class. +3. **Cross-language layer** — once both this and `octive-lean` have a real evaluator and big-step semantics, factor a shared core out of two concrete points rather than guessing the abstraction in advance. + +## Build + +```sh +lake build +``` + +Requires the Lean toolchain pinned in [`lean-toolchain`](lean-toolchain). [`elan`](https://github.com/leanprover/elan) will pick it up automatically. + +## Run + +```sh +# REPL stub +lake exe golang-lean + +# Run a .go script (not yet implemented) +lake exe golang-lean path/to/script.go + +# Verify the corpus against expected outputs +lake build corpus-check +lake exe corpus-check +``` + +## Layout + +| Path | What's there | +| --- | --- | +| `GolangLean/` | Library: `Token`, `Scanner`, `AST`, `Parser`, `Value`, `Env`, `Eval`, `Builtins`, `REPL`, `BigStep`, `PureEval`, `ValueEquiv`, `Error` | +| `Main.lean` | Entry point — REPL or file runner | +| `CorpusCheck.lean` | Test driver for `corpus/` | +| `corpus/` | `.go` test cases paired with `.expected` outputs | +| `go-upstream/` | Shallow clone of `golang/go` (gitignored, used as reference) | + +## Status + +**Scaffold.** Module skeleton in place. `Token` and `AST` are real ports of `go/token` and `go/ast`. Scanner / Parser / Eval are stubs that throw `notImpl` so the project compiles. + +Next: +1. Implement `Scanner.scan` against `go-upstream/src/go/scanner/scanner.go`. +2. Implement `Parser.parseFile` recursive-descent against `go-upstream/src/go/parser/parser.go`. +3. Implement `Eval` for the pure subset (no goroutines, no defer, no panic/recover). +4. Mirror octive-lean's `BigStep` / `ValueEquiv` triple. + +## Reference + +`go-upstream/` is a shallow clone of `https://github.com/golang/go`. Key paths: + +| Path | What's there | +| --- | --- | +| `go-upstream/src/go/token/` | Token kinds, position tracking | +| `go-upstream/src/go/scanner/` | Lexer | +| `go-upstream/src/go/ast/` | AST node types | +| `go-upstream/src/go/parser/` | Recursive-descent parser | +| `go-upstream/src/go/types/` | Type-checker (later) | +| `go-upstream/src/runtime/` | Runtime / scheduler / GC (much later) | diff --git a/golang-lean/RosettaStone.lean b/golang-lean/RosettaStone.lean new file mode 100644 index 0000000..8cbcadc --- /dev/null +++ b/golang-lean/RosettaStone.lean @@ -0,0 +1,157 @@ +import GolangLean + +/-! +# golang-lean Rosetta Stone + +Side-by-side comparisons of Go syntax with its Lean encodings. + +Two encodings are shown for each construct: + + * **Surface AST** (`GolangLean.Expr`, etc.) — the parser's target. + Mirrors `go/ast.Node` from the upstream Go reference compiler. + Faithful to Go's grammar but has no formal semantics yet (the + surface evaluator is a stub). + + * **Tiny Go Core** (`GolangLean.Core.Term`) — the kernel calculus. + Smaller, typed, with proven big-step semantics, executable + evaluator, determinism, and preservation. Real Go programs would + desugar into this kernel via an elaborator (future work). + +Where applicable, `#eval` runs the term through the proven evaluator. +The result is `some (Value, Heap)` on success. +-/ + +open GolangLean + +-- A tiny convenience for running TGC programs. +def runTGC (e : Core.Term) : Option (Core.Value × Core.Heap) := + Core.eval 1000 #[] .nil e + +/-! ## §1 Literals — atoms of any language. -/ + +-- Go: `42` +def lit42_AST : Expr := .intLit 42 +def lit42_TGC : Core.Term := .intLit 42 +#eval runTGC lit42_TGC -- some (vInt 42, []) + +-- Go: `true` (a predeclared identifier in Go) +def litTrue_AST : Expr := .ident "true" +def litTrue_TGC : Core.Term := .boolLit true +#eval runTGC litTrue_TGC -- some (vBool true, []) + +/-! ## §2 Arithmetic and comparison + +Go's binary operators map directly to TGC's `binop`. -/ + +-- Go: `5 + 3` +def add5_3_AST : Expr := .binOp .add (.intLit 5) (.intLit 3) +def add5_3_TGC : Core.Term := .binop .add (.intLit 5) (.intLit 3) +#eval runTGC add5_3_TGC -- some (vInt 8, []) + +-- Go: `(5 + 3) * 2` +def expr_AST : Expr := + .binOp .mul (.binOp .add (.intLit 5) (.intLit 3)) (.intLit 2) +def expr_TGC : Core.Term := + .binop .mul (.binop .add (.intLit 5) (.intLit 3)) (.intLit 2) +#eval runTGC expr_TGC -- some (vInt 16, []) + +-- Go: `x < y` +def less_AST : Expr := .binOp .lss (.ident "x") (.ident "y") +def less_TGC : Core.Term := .binop .lt (.var "x") (.var "y") +-- Needs an env binding x and y — see §4. + +-- Go: `x == 5` +def eq_AST : Expr := .binOp .eql (.ident "x") (.intLit 5) +def eq_TGC : Core.Term := .binop .eq (.var "x") (.intLit 5) + +/-! ## §3 Conditionals + +Go's `if` statement vs. TGC's `ifte` expression form. -/ + +-- Go (statement): `if x < 10 { return 1 } else { return 2 }` +-- Go has no ternary expression; TGC's `ifte` is closer to ML's +-- `if ... then ... else ...` for proof-friendliness. +def ifte_TGC : Core.Term := + .ifte (.binop .lt (.var "x") (.intLit 10)) + (.intLit 1) (.intLit 2) + +/-! ## §4 Variables and let-binding + +Go's `x := 42` ↦ TGC's `letIn` (lexical binding, scoped to body). +For *mutating* variables, see §6 (TGC uses `assign` on a heap-allocated +ref). -/ + +-- Go: `x := 42; x + 1` +def letIn_TGC : Core.Term := + .letIn "x" (.intLit 42) (.binop .add (.var "x") (.intLit 1)) +#eval runTGC letIn_TGC -- some (vInt 43, []) + +-- Two bindings: `x := 3; y := 7; x < y` +def less_run : Core.Term := + .letIn "x" (.intLit 3) + (.letIn "y" (.intLit 7) + (.binop .lt (.var "x") (.var "y"))) +#eval runTGC less_run -- some (vBool true, []) + +/-! ## §5 Functions and application + +Go's anonymous function `func(x int) int { return x*2 }` ↦ TGC's +`lam` (a closure). Application is `app`. -/ + +-- Go: `(func(x int) int { return x*2 })(21)` +def lambda_app_TGC : Core.Term := + .app (.lam "x" (.binop .mul (.var "x") (.intLit 2))) (.intLit 21) +#eval runTGC lambda_app_TGC -- some (vInt 42, []) + +-- Higher-order. With `double = func(x) { x*2 }`: +-- `(func(f) func(x) { f(f(x)) })(double)(3)` = double(double(3)) = 12 +def higher_order_TGC : Core.Term := + .app + (.app + (.lam "f" + (.lam "x" (.app (.var "f") (.app (.var "f") (.var "x"))))) + (.lam "x" (.binop .mul (.var "x") (.intLit 2)))) + (.intLit 3) +#eval runTGC higher_order_TGC -- some (vInt 12, []) + +/-! ## §6 References, dereference, assignment + +Go's `&x`, `*p`, `*p = e` ↦ TGC's `refMk`, `deref`, `assign`. +The heap (third tuple component, an `Array Value`) accumulates +allocations. -/ + +-- Go: `p := &42; *p` +def ref_deref_TGC : Core.Term := + .letIn "p" (.refMk (.intLit 42)) (.deref (.var "p")) +#eval runTGC ref_deref_TGC -- some (vInt 42, #[vInt 42]) + +-- Go: `p := &42; *p = 100; *p` +def ref_assign_TGC : Core.Term := + .letIn "p" (.refMk (.intLit 42)) + (.seq (.assign (.var "p") (.intLit 100)) + (.deref (.var "p"))) +#eval runTGC ref_assign_TGC -- some (vInt 100, #[vInt 100]) + +/-! ## §7 Sequencing — Go statement separator. -/ + +-- Go: `_ = 1; 42` +def seq_TGC : Core.Term := .seq (.intLit 1) (.intLit 42) +#eval runTGC seq_TGC -- some (vInt 42, []) + +/-! ## §8 Theorems applicable to these terms + + * `Core.BigStep.deterministic` — the relation is functional. + * `Core.eval_sound` — every successful eval is a `BigStep` derivation. + * `Core.preservation` — well-typed terms produce well-typed values. + +Each `#eval` above is a *runtime witness* for a `BigStep` derivation, +which by `eval_sound` is also a proof that the term has the shown +value. Type-check the term and apply `preservation` to get the value's +type for free. -/ + +-- Demo: a small composition showing TGC's expressive range. +-- `let double = λ x. x*2 in double (double 5)` = 20 +def double_twice : Core.Term := + .letIn "double" (.lam "x" (.binop .mul (.var "x") (.intLit 2))) + (.app (.var "double") (.app (.var "double") (.intLit 5))) +#eval runTGC double_twice -- some (vInt 20, []) diff --git a/golang-lean/corpus/.gitkeep b/golang-lean/corpus/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/golang-lean/justfile b/golang-lean/justfile new file mode 100644 index 0000000..fc876e7 --- /dev/null +++ b/golang-lean/justfile @@ -0,0 +1,24 @@ +# Common project tasks. Run `just` to list. + +default: + @just --list + +build: + lake build + +repl: + lake exe golang-lean + +run script: + lake exe golang-lean {{script}} + +test: + lake build && lake exe corpus-check + +update-corpus: + lake build && lake exe corpus-check --update + +clean: + lake clean + +fresh: clean build diff --git a/golang-lean/lake-manifest.json b/golang-lean/lake-manifest.json new file mode 100644 index 0000000..ce3ab12 --- /dev/null +++ b/golang-lean/lake-manifest.json @@ -0,0 +1,6 @@ +{"version": "1.2.0", + "packagesDir": ".lake/packages", + "packages": [], + "name": "«golang-lean»", + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/golang-lean/lakefile.toml b/golang-lean/lakefile.toml new file mode 100644 index 0000000..3dd4c33 --- /dev/null +++ b/golang-lean/lakefile.toml @@ -0,0 +1,17 @@ +name = "golang-lean" +version = "0.1.0" +defaultTargets = ["golang-lean"] + +[[lean_lib]] +name = "GolangLean" + +[[lean_lib]] +name = "RosettaStone" + +[[lean_exe]] +name = "golang-lean" +root = "Main" + +[[lean_exe]] +name = "corpus-check" +root = "CorpusCheck" diff --git a/golang-lean/lean-toolchain b/golang-lean/lean-toolchain new file mode 100644 index 0000000..6c7e31f --- /dev/null +++ b/golang-lean/lean-toolchain @@ -0,0 +1 @@ +leanprover/lean4:v4.30.0-rc2