infoductor/Infoductor/Foundation/MetaParse.lean
Maximus Gorog 62b73bb176 Atomic Phase 3 witnesses via decide
Five kernel-rooted atomic-shape witnesses for `tokenize ∘ render = toTokens`:
  · tokenize_render_name_anonymous
  · tokenize_render_classifier_always
  · tokenize_render_classifier_never
  · tokenize_render_cterm_empty
  · tokenize_render_artifact_empty

Each closes via `decide` with `maxRecDepth 2000` — the kernel
fully reduces the entire chain (toLeanSource → String.toList →
tokenize → comparison) on the closed atomic input.

Recursive arms (.str, .num, .app, .lam, .comp, .transp, .meet, .join,
.cterm, .refTo, .source) require the four substantial distribution
lemmas (readIdent_app, readStrLit_app, tokenize_app_clean,
tokenize_render_X by induction) — documented as future work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 13:03:51 -06:00

952 lines
41 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Infoductor.Foundation.MetaParse — the inverse of `toLeanSource`
=================================================================
A hand-written tokenizer + recursive-descent parser that reads
the Lean source emitted by the `toLeanSource` family of renderers
and reconstructs the original `MetaCTerm` / `MetaClassifier` /
`MetaArtifact` / `Lean.Name`.
The bridge's loop closes at the *string* level, in Lean itself:
no external elaborator needed.
Implementation strategy:
· `Token` is a four-arm inductive (parens, ident chains, string
literals, num literals).
· `tokenize` is fuel-based, so it's structurally recursive on
the fuel parameter (no `partial` needed) — fuel is the input
list length, which is always sufficient.
· Parsers per type are likewise fuel-based. Each successful
parse consumes ≥ 1 token, so fuel = `tokens.length + 1` is
always enough for any well-formed input.
· String escape: only `"` and `\` are escaped (renderer-side
`escapeStrLit` mirrors this exactly).
-/
import Infoductor.Foundation.Meta
namespace Infoductor
-- ── Tokens ──────────────────────────────────────────────────────────────────
inductive Token where
| lparen : Token
| rparen : Token
| ident : String → Token
| strLit : String → Token
| numLit : Nat → Token
deriving Repr, DecidableEq
-- ── Character predicates ───────────────────────────────────────────────────
def isIdentStartChar (c : Char) : Bool :=
c.isAlpha || c == '_'
def isIdentRestChar (c : Char) : Bool :=
c.isAlpha || c.isDigit || c == '_' || c == '.'
def isWhitespace (c : Char) : Bool :=
c == ' ' || c == '\n' || c == '\t' || c == '\r'
-- ── Tokenizer helpers (fuel-based for kernel termination) ──────────────────
/-- Read identifier chars until a non-identifier char. Returns
(read string, remaining chars). Fuel-based for kernel
termination; the tokenizer feeds in `chars.length` fuel
which is always sufficient. -/
def readIdent : Nat → List Char → String → String × List Char
| 0, chars, acc => (acc, chars)
| _+1, [], acc => (acc, [])
| n+1, c :: rest, acc =>
if isIdentRestChar c then readIdent n rest (acc.push c)
else (acc, c :: rest)
/-- Read a decimal number. Fuel-based. -/
def readNum : Nat → List Char → Nat → Nat × List Char
| 0, chars, acc => (acc, chars)
| _+1, [], acc => (acc, [])
| n+1, c :: rest, acc =>
if c.isDigit then
readNum n rest (acc * 10 + (c.toNat - '0'.toNat))
else (acc, c :: rest)
/-- Read characters of a string literal until the closing `"`,
handling `\"` and `\\` escapes. Fuel-based. -/
def readStrLit : Nat → List Char → String → Option (String × List Char)
| 0, _, _ => none
| _+1, [], _ => none
| _+1, '"' :: rest, acc => some (acc, rest)
| n+1, '\\' :: '"' :: rest, acc => readStrLit n rest (acc.push '"')
| n+1, '\\' :: '\\' :: rest, acc => readStrLit n rest (acc.push '\\')
| n+1, c :: rest, acc => readStrLit n rest (acc.push c)
-- `escapeStrLit` lives in Foundation.Meta now (used by both
-- the renderer and the parser); see Meta.lean for the definition.
-- ── Tokenizer ──────────────────────────────────────────────────────────────
/-- Tokenize a `List Char` into `List Token`. Fuel-based for
kernel termination; the top-level `tokenize` provides
`chars.length + 1` fuel, which is always sufficient. -/
def tokenizeAux : Nat → List Char → List Token
| 0, _ => []
| _+1, [] => []
| n+1, '(' :: rest => Token.lparen :: tokenizeAux n rest
| n+1, ')' :: rest => Token.rparen :: tokenizeAux n rest
| n+1, '"' :: rest =>
match readStrLit (n+1) rest "" with
| some (s, rest') => Token.strLit s :: tokenizeAux n rest'
| none => []
| n+1, c :: rest =>
if isWhitespace c then tokenizeAux n rest
else if c.isDigit then
let (num, rest') := readNum (n+1) (c :: rest) 0
Token.numLit num :: tokenizeAux n rest'
else if isIdentStartChar c then
let (ident, rest') := readIdent (n+1) (c :: rest) ""
Token.ident ident :: tokenizeAux n rest'
else
[]
/-- Top-level tokenizer. Fuel = `chars.length + 1` is always
sufficient since each token-emitting branch peels ≥ 1 char. -/
def tokenize (chars : List Char) : List Token :=
tokenizeAux (chars.length + 1) chars
/-- Tokenize a `String` directly. -/
def tokenizeStr (s : String) : List Token :=
tokenize s.toList
-- ── Parsers ────────────────────────────────────────────────────────────────
-- All parsers fuel-based. Recursion structurally on the Nat
-- parameter, so they're total without `partial`.
/-- Parse a `Lean.Name`. Recognises:
· `Lean.Name.anonymous` (atomic)
· `(Lean.Name.str <name> "<str>")` (recursive)
· `(Lean.Name.num <name> <nat>)` (recursive)
The renderer no longer wraps recursive sub-name calls in
extra parens (post-fix), so the parser handles only the
canonical bare-atomic + constructor-app forms. -/
def parseName?Aux : Nat → List Token → Option (Lean.Name × List Token)
| 0, _ => none
| _+1, Token.ident "Lean.Name.anonymous" :: rest =>
some (Lean.Name.anonymous, rest)
| n+1, Token.lparen :: Token.ident "Lean.Name.str" :: rest =>
match parseName?Aux n rest with
| some (p, Token.strLit s :: Token.rparen :: rest') =>
some (Lean.Name.str p s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Lean.Name.num" :: rest =>
match parseName?Aux n rest with
| some (p, Token.numLit k :: Token.rparen :: rest') =>
some (Lean.Name.num p k, rest')
| _ => none
| _, _ => none
/-- Parse a `MetaClassifier`. -/
def parseClassifier?Aux : Nat → List Token → Option (MetaClassifier × List Token)
| 0, _ => none
| _+1, Token.ident "Infoductor.MetaClassifier.always" :: rest =>
some (MetaClassifier.always, rest)
| _+1, Token.ident "Infoductor.MetaClassifier.never" :: rest =>
some (MetaClassifier.never, rest)
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.atDecl" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') => some (MetaClassifier.atDecl nm, rest')
| _ => none
| _+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inFile" :: rest =>
match rest with
| Token.strLit s :: Token.rparen :: rest' =>
some (MetaClassifier.inFile s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.underAttribute" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaClassifier.underAttribute nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.dependencyOf" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaClassifier.dependencyOf nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inNamespace" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaClassifier.inNamespace nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.meet" :: rest =>
match parseClassifier?Aux n rest with
| some (a, rest') =>
match parseClassifier?Aux n rest' with
| some (b, Token.rparen :: rest'') =>
some (MetaClassifier.meet a b, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.join" :: rest =>
match parseClassifier?Aux n rest with
| some (a, rest') =>
match parseClassifier?Aux n rest' with
| some (b, Token.rparen :: rest'') =>
some (MetaClassifier.join a b, rest'')
| _ => none
| _ => none
| _, _ => none
/-- Parse a `MetaCTerm`. -/
def parseMetaCTerm?Aux : Nat → List Token → Option (MetaCTerm × List Token)
| 0, _ => none
| _+1, Token.ident "Infoductor.MetaCTerm.empty" :: rest =>
some (MetaCTerm.empty, rest)
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.ident" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') => some (MetaCTerm.ident nm, rest')
| _ => none
| _+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.sym" :: rest =>
match rest with
| Token.strLit s :: Token.rparen :: rest' =>
some (MetaCTerm.sym s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.app" :: rest =>
match parseMetaCTerm?Aux n rest with
| some (f, rest') =>
match parseMetaCTerm?Aux n rest' with
| some (a, Token.rparen :: rest'') =>
some (MetaCTerm.app f a, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.lam" :: rest =>
match rest with
| Token.strLit x :: rest' =>
match parseMetaCTerm?Aux n rest' with
| some (t, Token.rparen :: rest'') => some (MetaCTerm.lam x t, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.plam" :: rest =>
match rest with
| Token.strLit i :: rest' =>
match parseMetaCTerm?Aux n rest' with
| some (t, Token.rparen :: rest'') => some (MetaCTerm.plam i t, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.comp" :: rest =>
match rest with
| Token.strLit s :: rest1 =>
match parseMetaCTerm?Aux n rest1 with
| some (A, rest2) =>
match parseClassifier?Aux n rest2 with
| some (φ, rest3) =>
match parseMetaCTerm?Aux n rest3 with
| some (u, rest4) =>
match parseMetaCTerm?Aux n rest4 with
| some (t, Token.rparen :: rest5) =>
some (MetaCTerm.comp s A φ u t, rest5)
| _ => none
| _ => none
| _ => none
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.transp" :: rest =>
match rest with
| Token.strLit s :: rest1 =>
match parseMetaCTerm?Aux n rest1 with
| some (A, rest2) =>
match parseClassifier?Aux n rest2 with
| some (φ, rest3) =>
match parseMetaCTerm?Aux n rest3 with
| some (t, Token.rparen :: rest4) =>
some (MetaCTerm.transp s A φ t, rest4)
| _ => none
| _ => none
| _ => none
| _ => none
| _, _ => none
/-- Parse a `MetaArtifact`. -/
def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Token)
| 0, _ => none
| _+1, Token.ident "Infoductor.MetaArtifact.empty" :: rest =>
some (MetaArtifact.empty, rest)
| _+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.source" :: rest =>
match rest with
| Token.strLit s :: Token.rparen :: rest' =>
some (MetaArtifact.source s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.refTo" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaArtifact.refTo nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.cterm" :: rest =>
match parseMetaCTerm?Aux n rest with
| some (m, Token.rparen :: rest') =>
some (MetaArtifact.cterm m, rest')
| _ => none
| _, _ => none
-- ── Top-level wrappers ─────────────────────────────────────────────────────
-- Fuel = `tokens.length + 1` is always sufficient: every successful
-- parse arm consumes ≥ 1 token, so depth is bounded by length.
def parseName? (tokens : List Token) : Option (Lean.Name × List Token) :=
parseName?Aux (tokens.length + 1) tokens
def parseClassifier? (tokens : List Token) : Option (MetaClassifier × List Token) :=
parseClassifier?Aux (tokens.length + 1) tokens
def parseMetaCTerm? (tokens : List Token) : Option (MetaCTerm × List Token) :=
parseMetaCTerm?Aux (tokens.length + 1) tokens
def parseArtifact? (tokens : List Token) : Option (MetaArtifact × List Token) :=
parseArtifact?Aux (tokens.length + 1) tokens
def MetaCTerm.fromLeanSource? (s : String) : Option MetaCTerm :=
match parseMetaCTerm? (tokenizeStr s) with
| some (t, []) => some t
| _ => none
def MetaClassifier.fromLeanSource? (s : String) : Option MetaClassifier :=
match parseClassifier? (tokenizeStr s) with
| some (φ, []) => some φ
| _ => none
def MetaArtifact.fromLeanSource? (s : String) : Option MetaArtifact :=
match parseArtifact? (tokenizeStr s) with
| some (a, []) => some a
| _ => none
-- ── Canonical token forms — direct meta-mirror → List Token ────────────────
-- Each function mirrors the corresponding `toLeanSource` renderer
-- but produces tokens directly, bypassing the String layer. These
-- are the canonical token forms against which parser correctness
-- is proven. The relationship `tokenize ∘ toLeanSource = toTokens`
-- is established separately as a String-level lemma.
def nameToTokens : Lean.Name → List Token
| .anonymous => [Token.ident "Lean.Name.anonymous"]
| .str p s =>
[Token.lparen, Token.ident "Lean.Name.str"] ++ nameToTokens p ++
[Token.strLit s, Token.rparen]
| .num p k =>
[Token.lparen, Token.ident "Lean.Name.num"] ++ nameToTokens p ++
[Token.numLit k, Token.rparen]
def MetaClassifier.toTokens : MetaClassifier → List Token
| .always => [Token.ident "Infoductor.MetaClassifier.always"]
| .never => [Token.ident "Infoductor.MetaClassifier.never"]
| .atDecl n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.atDecl"] ++
nameToTokens n ++ [Token.rparen]
| .inFile s =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.inFile",
Token.strLit s, Token.rparen]
| .underAttribute n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.underAttribute"] ++
nameToTokens n ++ [Token.rparen]
| .dependencyOf n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.dependencyOf"] ++
nameToTokens n ++ [Token.rparen]
| .inNamespace n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.inNamespace"] ++
nameToTokens n ++ [Token.rparen]
| .meet a b =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.meet"] ++
toTokens a ++ toTokens b ++ [Token.rparen]
| .join a b =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.join"] ++
toTokens a ++ toTokens b ++ [Token.rparen]
def MetaCTerm.toTokens : MetaCTerm → List Token
| .ident n =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.ident"] ++
nameToTokens n ++ [Token.rparen]
| .sym s =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.sym",
Token.strLit s, Token.rparen]
| .app f a =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.app"] ++
toTokens f ++ toTokens a ++ [Token.rparen]
| .lam x t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.lam",
Token.strLit x] ++ toTokens t ++ [Token.rparen]
| .plam i t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.plam",
Token.strLit i] ++ toTokens t ++ [Token.rparen]
| .comp s A φ u t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.comp",
Token.strLit s] ++ toTokens A ++ φ.toTokens ++
toTokens u ++ toTokens t ++ [Token.rparen]
| .transp s A φ t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.transp",
Token.strLit s] ++ toTokens A ++ φ.toTokens ++
toTokens t ++ [Token.rparen]
| .empty => [Token.ident "Infoductor.MetaCTerm.empty"]
def MetaArtifact.toTokens : MetaArtifact → List Token
| .source s => [Token.lparen, Token.ident "Infoductor.MetaArtifact.source",
Token.strLit s, Token.rparen]
| .declAt _ => [] -- not source-renderable; toTokens undefined for this arm
| .cterm m => [Token.lparen, Token.ident "Infoductor.MetaArtifact.cterm"] ++
m.toTokens ++ [Token.rparen]
| .refTo n => [Token.lparen, Token.ident "Infoductor.MetaArtifact.refTo"] ++
nameToTokens n ++ [Token.rparen]
| .empty => [Token.ident "Infoductor.MetaArtifact.empty"]
-- ── Size measures (for fuel bookkeeping in proofs) ─────────────────────────
def nameDepth : Lean.Name → Nat
| .anonymous => 1
| .str p _ => nameDepth p + 1
| .num p _ => nameDepth p + 1
def classifierDepth : MetaClassifier → Nat
| .always | .never => 1
| .atDecl n => nameDepth n + 1
| .inFile _ => 2
| .underAttribute n => nameDepth n + 1
| .dependencyOf n => nameDepth n + 1
| .inNamespace n => nameDepth n + 1
| .meet a b => max (classifierDepth a) (classifierDepth b) + 1
| .join a b => max (classifierDepth a) (classifierDepth b) + 1
def cTermDepth : MetaCTerm → Nat
| .ident n => nameDepth n + 1
| .sym _ => 2
| .empty => 1
| .lam _ t | .plam _ t => cTermDepth t + 1
| .app f a => max (cTermDepth f) (cTermDepth a) + 1
| .comp _ A φ u t =>
max (max (cTermDepth A) (classifierDepth φ))
(max (cTermDepth u) (cTermDepth t)) + 1
| .transp _ A φ t =>
max (max (cTermDepth A) (classifierDepth φ)) (cTermDepth t) + 1
def artifactDepth : MetaArtifact → Nat
| .source _ => 2
| .declAt _ => 0 -- not source-encodable; round-trip excluded
| .cterm m => cTermDepth m + 1
| .refTo n => nameDepth n + 1
| .empty => 1
/-- A `MetaArtifact` is *supported* by the round-trip iff it does
not carry a `Lean.Syntax` (which cannot be source-rendered).
All other arms are supported. -/
def MetaArtifact.supported : MetaArtifact → Bool
| .declAt _ => false
| _ => true
-- ── Parser correctness on toTokens (Phase 2) ──────────────────────────────
-- Universal theorems witnessing that each parser, when fed the
-- canonical token form of a value plus arbitrary trailing tokens,
-- reconstructs the value exactly and leaves the trailing tokens
-- untouched. Provided fuel is at least the depth of the value.
-- Proofs are by structural induction on the meta-mirror; no
-- String reasoning is needed.
theorem parseName?Aux_correct :
∀ (n : Lean.Name) (rest : List Token) (fuel : Nat),
fuel ≥ nameDepth n →
parseName?Aux fuel (nameToTokens n ++ rest) = some (n, rest)
| .anonymous, rest, fuel, h => by
rcases fuel with _ | f
· simp [nameDepth] at h
· simp [nameToTokens, parseName?Aux]
| .str p s, rest, fuel, h => by
rcases fuel with _ | f
· simp [nameDepth] at h
· simp only [nameToTokens, List.append_assoc, List.cons_append,
List.nil_append, parseName?Aux]
have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega
have ih := parseName?Aux_correct p
(Token.strLit s :: Token.rparen :: rest) f hp
rw [ih]
| .num p k, rest, fuel, h => by
rcases fuel with _ | f
· simp [nameDepth] at h
· simp only [nameToTokens, List.append_assoc, List.cons_append,
List.nil_append, parseName?Aux]
have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega
have ih := parseName?Aux_correct p
(Token.numLit k :: Token.rparen :: rest) f hp
rw [ih]
theorem parseClassifier?Aux_correct :
∀ (φ : MetaClassifier) (rest : List Token) (fuel : Nat),
fuel ≥ classifierDepth φ →
parseClassifier?Aux fuel (φ.toTokens ++ rest) = some (φ, rest)
| .always, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp [MetaClassifier.toTokens, parseClassifier?Aux]
| .never, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp [MetaClassifier.toTokens, parseClassifier?Aux]
| .atDecl n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .inFile s, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp [MetaClassifier.toTokens, parseClassifier?Aux]
| .underAttribute n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by
simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .dependencyOf n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by
simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .inNamespace n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by
simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .meet a b, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have ha : f ≥ classifierDepth a := by
simp [classifierDepth] at h; omega
have hb : f ≥ classifierDepth b := by
simp [classifierDepth] at h; omega
simp only [parseClassifier?Aux_correct a _ f ha,
parseClassifier?Aux_correct b _ f hb]
| .join a b, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have ha : f ≥ classifierDepth a := by
simp [classifierDepth] at h; omega
have hb : f ≥ classifierDepth b := by
simp [classifierDepth] at h; omega
simp only [parseClassifier?Aux_correct a _ f ha,
parseClassifier?Aux_correct b _ f hb]
theorem parseMetaCTerm?Aux_correct :
∀ (t : MetaCTerm) (rest : List Token) (fuel : Nat),
fuel ≥ cTermDepth t →
parseMetaCTerm?Aux fuel (t.toTokens ++ rest) = some (t, rest)
| .empty, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp [MetaCTerm.toTokens, parseMetaCTerm?Aux]
| .ident n, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hn : f ≥ nameDepth n := by simp [cTermDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .sym s, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp [MetaCTerm.toTokens, parseMetaCTerm?Aux]
| .app fa aa, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hf : f ≥ cTermDepth fa := by simp [cTermDepth] at h; omega
have ha : f ≥ cTermDepth aa := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct fa _ f hf,
parseMetaCTerm?Aux_correct aa _ f ha]
| .lam x t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct t _ f ht]
| .plam i t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct t _ f ht]
| .comp s A φ u t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega
have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega
have hu : f ≥ cTermDepth u := by simp [cTermDepth] at h; omega
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct A _ f hA,
parseClassifier?Aux_correct φ _ f hφ,
parseMetaCTerm?Aux_correct u _ f hu,
parseMetaCTerm?Aux_correct t _ f ht]
| .transp s A φ t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega
have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct A _ f hA,
parseClassifier?Aux_correct φ _ f hφ,
parseMetaCTerm?Aux_correct t _ f ht]
theorem parseArtifact?Aux_correct :
∀ (a : MetaArtifact) (rest : List Token) (fuel : Nat),
a.supported = true →
fuel ≥ artifactDepth a →
parseArtifact?Aux fuel (a.toTokens ++ rest) = some (a, rest)
| .empty, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp [MetaArtifact.toTokens, parseArtifact?Aux]
| .source s, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp [MetaArtifact.toTokens, parseArtifact?Aux]
| .refTo n, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseArtifact?Aux]
have hn : f ≥ nameDepth n := by simp [artifactDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .cterm m, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseArtifact?Aux]
have hm : f ≥ cTermDepth m := by simp [artifactDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct m _ f hm]
| .declAt _, _, _, hsup, _ => by
simp [MetaArtifact.supported] at hsup
-- ── Token-level round-trip — the canonical universal theorems ─────────────
-- These follow directly from the `parser?Aux_correct` lemmas above
-- by setting `rest = []` and `fuel = depth + k` (any sufficient
-- fuel). They're the cleanest universal statements: the parser,
-- starting from a value's canonical token form, recovers the
-- value with no leftover tokens. No String reasoning needed.
/-- Top-level token-entry parser. Tokenisation is presentation;
this is the *algorithmic* round-trip's input. -/
def MetaCTerm.fromTokens? (tokens : List Token) : Option MetaCTerm :=
match parseMetaCTerm? tokens with
| some (t, []) => some t
| _ => none
def MetaClassifier.fromTokens? (tokens : List Token) : Option MetaClassifier :=
match parseClassifier? tokens with
| some (φ, []) => some φ
| _ => none
def Infoductor.nameFromTokens? (tokens : List Token) : Option Lean.Name :=
match parseName? tokens with
| some (n, []) => some n
| _ => none
def MetaArtifact.fromTokens? (tokens : List Token) : Option MetaArtifact :=
match parseArtifact? tokens with
| some (a, []) => some a
| _ => none
-- Length-vs-depth lemmas: each toTokens output is at least as long
-- as the depth of the value, so `tokens.length + 1` is always
-- sufficient fuel.
theorem nameToTokens_length_bound (n : Lean.Name) :
(nameToTokens n).length + 1 ≥ nameDepth n := by
induction n with
| anonymous => simp [nameToTokens, nameDepth]
| str p s ih => simp [nameToTokens, nameDepth]; omega
| num p k ih => simp [nameToTokens, nameDepth]; omega
theorem classifierToTokens_length_bound (φ : MetaClassifier) :
(φ.toTokens).length + 1 ≥ classifierDepth φ := by
induction φ with
| always => simp [MetaClassifier.toTokens, classifierDepth]
| never => simp [MetaClassifier.toTokens, classifierDepth]
| atDecl n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| inFile s => simp [MetaClassifier.toTokens, classifierDepth]
| underAttribute n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| dependencyOf n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| inNamespace n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| meet a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega
| join a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega
theorem cTermToTokens_length_bound (t : MetaCTerm) :
(t.toTokens).length + 1 ≥ cTermDepth t := by
induction t with
| empty => simp [MetaCTerm.toTokens, cTermDepth]
| ident n =>
simp [MetaCTerm.toTokens, cTermDepth]
have := nameToTokens_length_bound n; omega
| sym s => simp [MetaCTerm.toTokens, cTermDepth]
| app f a ihf iha => simp [MetaCTerm.toTokens, cTermDepth]; omega
| lam x t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega
| plam i t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega
| comp s A φ u t ihA ihu iht =>
simp [MetaCTerm.toTokens, cTermDepth]
have := classifierToTokens_length_bound φ
omega
| transp s A φ t ihA iht =>
simp [MetaCTerm.toTokens, cTermDepth]
have := classifierToTokens_length_bound φ
omega
/-- Every meta-mirror value's canonical token form parses back to
the same value. Universal — by structural induction on the
meta-mirror type, with sufficient fuel guaranteed by the
length-vs-depth lemma above. -/
theorem nameFromTokens?_round_trip (n : Lean.Name) :
Infoductor.nameFromTokens? (nameToTokens n) = some n := by
unfold Infoductor.nameFromTokens? parseName?
have h := nameToTokens_length_bound n
have := parseName?Aux_correct n [] ((nameToTokens n).length + 1) h
rw [List.append_nil] at this
rw [this]
theorem classifierFromTokens?_round_trip (φ : MetaClassifier) :
MetaClassifier.fromTokens? φ.toTokens = some φ := by
unfold MetaClassifier.fromTokens? parseClassifier?
have h := classifierToTokens_length_bound φ
have := parseClassifier?Aux_correct φ [] ((φ.toTokens).length + 1) h
rw [List.append_nil] at this
rw [this]
theorem cTermFromTokens?_round_trip (t : MetaCTerm) :
MetaCTerm.fromTokens? t.toTokens = some t := by
unfold MetaCTerm.fromTokens? parseMetaCTerm?
have h := cTermToTokens_length_bound t
have := parseMetaCTerm?Aux_correct t [] ((t.toTokens).length + 1) h
rw [List.append_nil] at this
rw [this]
theorem artifactFromTokens?_round_trip (a : MetaArtifact)
(hsup : a.supported = true) :
MetaArtifact.fromTokens? a.toTokens = some a := by
unfold MetaArtifact.fromTokens? parseArtifact?
-- artifact length bound
have hlen : (a.toTokens).length + 1 ≥ artifactDepth a := by
cases a with
| empty => simp [MetaArtifact.toTokens, artifactDepth]
| source s => simp [MetaArtifact.toTokens, artifactDepth]
| declAt _ => simp [MetaArtifact.supported] at hsup
| cterm m =>
simp [MetaArtifact.toTokens, artifactDepth]
have := cTermToTokens_length_bound m; omega
| refTo n =>
simp [MetaArtifact.toTokens, artifactDepth]
have := nameToTokens_length_bound n; omega
have := parseArtifact?Aux_correct a [] ((a.toTokens).length + 1) hsup hlen
rw [List.append_nil] at this
rw [this]
-- ── Phase 3: tokenize ∘ render = toTokens ─────────────────────────────────
-- The String-level half. Foundation lemmas about tokenize's
-- behaviour, then induction over each meta-mirror type.
/-- `tokenize` on `(` :: rest reduces to `lparen :: tokenize rest`. -/
theorem tokenize_lparen_cons (rest : List Char) :
tokenize ('(' :: rest) = Token.lparen :: tokenize rest := by
simp [tokenize, tokenizeAux]
/-- `tokenize` on `)` :: rest reduces to `rparen :: tokenize rest`. -/
theorem tokenize_rparen_cons (rest : List Char) :
tokenize (')' :: rest) = Token.rparen :: tokenize rest := by
simp [tokenize, tokenizeAux]
/-- `tokenize` skips a leading space. -/
theorem tokenize_space_cons (rest : List Char) :
tokenize (' ' :: rest) = tokenize rest := by
simp [tokenize, tokenizeAux, isWhitespace]
-- Atomic-arm Phase 3 witnesses via `decide`. Each closed input
-- reduces in the kernel to a concrete token list; we prove
-- structural identity between `tokenize (toLeanSource v).toList`
-- and `toTokens v` for these atomic shapes.
set_option maxRecDepth 2000 in
theorem tokenize_render_name_anonymous :
tokenize (nameToLeanSource Lean.Name.anonymous).toList =
nameToTokens Lean.Name.anonymous := by
decide
set_option maxRecDepth 2000 in
theorem tokenize_render_classifier_always :
tokenize (MetaClassifier.toLeanSource MetaClassifier.always).toList =
MetaClassifier.toTokens MetaClassifier.always := by
decide
set_option maxRecDepth 2000 in
theorem tokenize_render_classifier_never :
tokenize (MetaClassifier.toLeanSource MetaClassifier.never).toList =
MetaClassifier.toTokens MetaClassifier.never := by
decide
set_option maxRecDepth 2000 in
theorem tokenize_render_cterm_empty :
tokenize (MetaCTerm.toLeanSource MetaCTerm.empty).toList =
MetaCTerm.toTokens MetaCTerm.empty := by
decide
set_option maxRecDepth 2000 in
theorem tokenize_render_artifact_empty :
tokenize (MetaArtifact.toLeanSource MetaArtifact.empty).toList =
MetaArtifact.toTokens MetaArtifact.empty := by
decide
-- The recursive arms (`.str`, `.app`, `.lam`, etc.) require the
-- four substantial lemmas sketched below — multi-page Lean
-- reasoning about `readIdent` / `readStrLit` distribution.
-- Documented as future work; the token-level universal above
-- plus `decide` on closed instances cover the round-trip
-- operationally in the meantime.
--
-- readIdent_app : readIdent on (ident-chars ++ rest) where
-- rest starts cleanly returns (acc ++ ident, rest).
-- readStrLit_app : readStrLit on (escapeStrLit body ++ '\"' ++ rest)
-- returns (body, rest).
-- tokenize_app_clean: tokenize distributes over a concatenation
-- where the prefix ends "cleanly".
-- tokenize_render_X: induction over each meta-mirror type using
-- the above plus IH on sub-values.
-- ── Round-trip — atomic kernel-reducible witnesses ─────────────────────────
-- For non-recursive shapes the round-trip is closed by `rfl` (or
-- `decide`) directly: rendering produces a fixed string, tokenising
-- produces a fixed token list, parsing returns the original. These
-- closed-form witnesses live here in `Foundation` (no example-level
-- Decidable instances needed).
theorem MetaCTerm.toLeanSource_empty_round_trip :
MetaCTerm.fromLeanSource? MetaCTerm.empty.toLeanSource = some MetaCTerm.empty := by
decide
theorem MetaClassifier.toLeanSource_always_round_trip :
MetaClassifier.fromLeanSource? MetaClassifier.always.toLeanSource =
some MetaClassifier.always := by
decide
theorem MetaClassifier.toLeanSource_never_round_trip :
MetaClassifier.fromLeanSource? MetaClassifier.never.toLeanSource =
some MetaClassifier.never := by
decide
theorem MetaArtifact.toLeanSource_empty_round_trip_render :
(MetaArtifact.fromLeanSource? MetaArtifact.empty.toLeanSource).map
MetaArtifact.toLeanSource = some MetaArtifact.empty.toLeanSource := by
decide
-- ── Round-trip — compositional shapes via decide (closed inputs) ───────────
-- These witness round-trip for arbitrary closed `MetaCTerm` values
-- via the kernel's `decide`. Each case is a closed proposition;
-- the kernel evaluates the renderer, tokenizer, and parser end-
-- to-end and checks structural equality.
theorem MetaCTerm.toLeanSource_ident_anonymous_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.ident Lean.Name.anonymous).toLeanSource =
some (MetaCTerm.ident Lean.Name.anonymous) := by
decide
theorem MetaCTerm.toLeanSource_app_empty_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.app .empty .empty).toLeanSource =
some (MetaCTerm.app .empty .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_lam_x_empty_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.lam "x" .empty).toLeanSource =
some (MetaCTerm.lam "x" .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_comp_round_trip :
MetaCTerm.fromLeanSource?
(MetaCTerm.comp "i" .empty .always .empty .empty).toLeanSource =
some (MetaCTerm.comp "i" .empty .always .empty .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_transp_round_trip :
MetaCTerm.fromLeanSource?
(MetaCTerm.transp "i" .empty .never .empty).toLeanSource =
some (MetaCTerm.transp "i" .empty .never .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_plam_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.plam "i" .empty).toLeanSource =
some (MetaCTerm.plam "i" .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_sym_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.sym "x").toLeanSource =
some (MetaCTerm.sym "x") := by
decide
-- Compositional witness: a `.comp` whose sub-fields are themselves
-- non-trivial `MetaCTerm`s round-trips through the parser, exercising
-- the structural recursion of both renderer and parser end-to-end.
set_option maxRecDepth 16000 in
theorem MetaCTerm.toLeanSource_nested_round_trip :
MetaCTerm.fromLeanSource?
(MetaCTerm.comp "i" (.ident `Univ) .always
(.lam "x" (.sym "y")) (.sym "z")).toLeanSource =
some (MetaCTerm.comp "i" (.ident `Univ) .always
(.lam "x" (.sym "y")) (.sym "z")) := by
decide
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_meet_round_trip :
MetaClassifier.fromLeanSource?
(MetaClassifier.meet .always .never).toLeanSource =
some (MetaClassifier.meet .always .never) := by
decide
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_atDecl_round_trip :
MetaClassifier.fromLeanSource?
(MetaClassifier.atDecl `Foo).toLeanSource =
some (MetaClassifier.atDecl `Foo) := by
decide
-- 3-level deep name regression test: `eq0.i` (the FaceFormula
-- `eq0` encoding) round-trips. Pre-fix this case failed because
-- the renderer double-parenthesised the recursive sub-name.
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_atDecl_three_level_round_trip :
MetaClassifier.fromLeanSource?
(MetaClassifier.atDecl
(Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")).toLeanSource =
some (MetaClassifier.atDecl
(Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")) := by
decide
end Infoductor