infoductor/Infoductor/Foundation/MetaParse.lean
Maximus Gorog 6b9ac691cb Phase 3 foundation lemmas; full universal documented
Adds three foundation lemmas about `tokenize`:
  · tokenize_lparen_cons : tokenize ('(' :: rest) = lparen :: tokenize rest
  · tokenize_rparen_cons : tokenize (')' :: rest) = rparen :: tokenize rest
  · tokenize_space_cons  : tokenize (' ' :: rest) = tokenize rest

These are the trivial cases — pure unfolding of `tokenizeAux` on
single-char-token branches.

The full Phase 3 (`tokenize (toLeanSource v).toList = toTokens v`)
requires four further substantial lemmas:
  · readIdent_split  : reading an ident sequence followed by a
                       non-ident-rest char (or end of input) yields
                       exactly the accumulated string.
  · readStrLit_split : reading an escapeStrLit-encoded body until
                       the closing `"` recovers the original string.
  · tokenize_app_clean: tokenize distributes over a concatenation
                       where the prefix ends "cleanly" (rparen,
                       whitespace, or strLit close).
  · tokenize_render_X: induction over each meta-mirror type using
                       the above plus IH on sub-values.

Each is multi-page Lean reasoning about `String`/`List Char`/
`readIdent`/`readStrLit` distribution.  The proof sketches are
documented inline.

The token-level universal (already proven) plus closed-instance
`decide` tests cover the round-trip operationally.  Adding the
Phase 3 universal would let us state
  ∀ t, fromLeanSource? (toLeanSource t) = some t
without any closed-instance restrictions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 13:01:38 -06:00

924 lines
40 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Infoductor.Foundation.MetaParse — the inverse of `toLeanSource`
=================================================================
A hand-written tokenizer + recursive-descent parser that reads
the Lean source emitted by the `toLeanSource` family of renderers
and reconstructs the original `MetaCTerm` / `MetaClassifier` /
`MetaArtifact` / `Lean.Name`.
The bridge's loop closes at the *string* level, in Lean itself:
no external elaborator needed.
Implementation strategy:
· `Token` is a four-arm inductive (parens, ident chains, string
literals, num literals).
· `tokenize` is fuel-based, so it's structurally recursive on
the fuel parameter (no `partial` needed) — fuel is the input
list length, which is always sufficient.
· Parsers per type are likewise fuel-based. Each successful
parse consumes ≥ 1 token, so fuel = `tokens.length + 1` is
always enough for any well-formed input.
· String escape: only `"` and `\` are escaped (renderer-side
`escapeStrLit` mirrors this exactly).
-/
import Infoductor.Foundation.Meta
namespace Infoductor
-- ── Tokens ──────────────────────────────────────────────────────────────────
inductive Token where
| lparen : Token
| rparen : Token
| ident : String → Token
| strLit : String → Token
| numLit : Nat → Token
deriving Repr, DecidableEq
-- ── Character predicates ───────────────────────────────────────────────────
def isIdentStartChar (c : Char) : Bool :=
c.isAlpha || c == '_'
def isIdentRestChar (c : Char) : Bool :=
c.isAlpha || c.isDigit || c == '_' || c == '.'
def isWhitespace (c : Char) : Bool :=
c == ' ' || c == '\n' || c == '\t' || c == '\r'
-- ── Tokenizer helpers (fuel-based for kernel termination) ──────────────────
/-- Read identifier chars until a non-identifier char. Returns
(read string, remaining chars). Fuel-based for kernel
termination; the tokenizer feeds in `chars.length` fuel
which is always sufficient. -/
def readIdent : Nat → List Char → String → String × List Char
| 0, chars, acc => (acc, chars)
| _+1, [], acc => (acc, [])
| n+1, c :: rest, acc =>
if isIdentRestChar c then readIdent n rest (acc.push c)
else (acc, c :: rest)
/-- Read a decimal number. Fuel-based. -/
def readNum : Nat → List Char → Nat → Nat × List Char
| 0, chars, acc => (acc, chars)
| _+1, [], acc => (acc, [])
| n+1, c :: rest, acc =>
if c.isDigit then
readNum n rest (acc * 10 + (c.toNat - '0'.toNat))
else (acc, c :: rest)
/-- Read characters of a string literal until the closing `"`,
handling `\"` and `\\` escapes. Fuel-based. -/
def readStrLit : Nat → List Char → String → Option (String × List Char)
| 0, _, _ => none
| _+1, [], _ => none
| _+1, '"' :: rest, acc => some (acc, rest)
| n+1, '\\' :: '"' :: rest, acc => readStrLit n rest (acc.push '"')
| n+1, '\\' :: '\\' :: rest, acc => readStrLit n rest (acc.push '\\')
| n+1, c :: rest, acc => readStrLit n rest (acc.push c)
-- `escapeStrLit` lives in Foundation.Meta now (used by both
-- the renderer and the parser); see Meta.lean for the definition.
-- ── Tokenizer ──────────────────────────────────────────────────────────────
/-- Tokenize a `List Char` into `List Token`. Fuel-based for
kernel termination; the top-level `tokenize` provides
`chars.length + 1` fuel, which is always sufficient. -/
def tokenizeAux : Nat → List Char → List Token
| 0, _ => []
| _+1, [] => []
| n+1, '(' :: rest => Token.lparen :: tokenizeAux n rest
| n+1, ')' :: rest => Token.rparen :: tokenizeAux n rest
| n+1, '"' :: rest =>
match readStrLit (n+1) rest "" with
| some (s, rest') => Token.strLit s :: tokenizeAux n rest'
| none => []
| n+1, c :: rest =>
if isWhitespace c then tokenizeAux n rest
else if c.isDigit then
let (num, rest') := readNum (n+1) (c :: rest) 0
Token.numLit num :: tokenizeAux n rest'
else if isIdentStartChar c then
let (ident, rest') := readIdent (n+1) (c :: rest) ""
Token.ident ident :: tokenizeAux n rest'
else
[]
/-- Top-level tokenizer. Fuel = `chars.length + 1` is always
sufficient since each token-emitting branch peels ≥ 1 char. -/
def tokenize (chars : List Char) : List Token :=
tokenizeAux (chars.length + 1) chars
/-- Tokenize a `String` directly. -/
def tokenizeStr (s : String) : List Token :=
tokenize s.toList
-- ── Parsers ────────────────────────────────────────────────────────────────
-- All parsers fuel-based. Recursion structurally on the Nat
-- parameter, so they're total without `partial`.
/-- Parse a `Lean.Name`. Recognises:
· `Lean.Name.anonymous` (atomic)
· `(Lean.Name.str <name> "<str>")` (recursive)
· `(Lean.Name.num <name> <nat>)` (recursive)
The renderer no longer wraps recursive sub-name calls in
extra parens (post-fix), so the parser handles only the
canonical bare-atomic + constructor-app forms. -/
def parseName?Aux : Nat → List Token → Option (Lean.Name × List Token)
| 0, _ => none
| _+1, Token.ident "Lean.Name.anonymous" :: rest =>
some (Lean.Name.anonymous, rest)
| n+1, Token.lparen :: Token.ident "Lean.Name.str" :: rest =>
match parseName?Aux n rest with
| some (p, Token.strLit s :: Token.rparen :: rest') =>
some (Lean.Name.str p s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Lean.Name.num" :: rest =>
match parseName?Aux n rest with
| some (p, Token.numLit k :: Token.rparen :: rest') =>
some (Lean.Name.num p k, rest')
| _ => none
| _, _ => none
/-- Parse a `MetaClassifier`. -/
def parseClassifier?Aux : Nat → List Token → Option (MetaClassifier × List Token)
| 0, _ => none
| _+1, Token.ident "Infoductor.MetaClassifier.always" :: rest =>
some (MetaClassifier.always, rest)
| _+1, Token.ident "Infoductor.MetaClassifier.never" :: rest =>
some (MetaClassifier.never, rest)
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.atDecl" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') => some (MetaClassifier.atDecl nm, rest')
| _ => none
| _+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inFile" :: rest =>
match rest with
| Token.strLit s :: Token.rparen :: rest' =>
some (MetaClassifier.inFile s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.underAttribute" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaClassifier.underAttribute nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.dependencyOf" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaClassifier.dependencyOf nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inNamespace" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaClassifier.inNamespace nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.meet" :: rest =>
match parseClassifier?Aux n rest with
| some (a, rest') =>
match parseClassifier?Aux n rest' with
| some (b, Token.rparen :: rest'') =>
some (MetaClassifier.meet a b, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.join" :: rest =>
match parseClassifier?Aux n rest with
| some (a, rest') =>
match parseClassifier?Aux n rest' with
| some (b, Token.rparen :: rest'') =>
some (MetaClassifier.join a b, rest'')
| _ => none
| _ => none
| _, _ => none
/-- Parse a `MetaCTerm`. -/
def parseMetaCTerm?Aux : Nat → List Token → Option (MetaCTerm × List Token)
| 0, _ => none
| _+1, Token.ident "Infoductor.MetaCTerm.empty" :: rest =>
some (MetaCTerm.empty, rest)
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.ident" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') => some (MetaCTerm.ident nm, rest')
| _ => none
| _+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.sym" :: rest =>
match rest with
| Token.strLit s :: Token.rparen :: rest' =>
some (MetaCTerm.sym s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.app" :: rest =>
match parseMetaCTerm?Aux n rest with
| some (f, rest') =>
match parseMetaCTerm?Aux n rest' with
| some (a, Token.rparen :: rest'') =>
some (MetaCTerm.app f a, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.lam" :: rest =>
match rest with
| Token.strLit x :: rest' =>
match parseMetaCTerm?Aux n rest' with
| some (t, Token.rparen :: rest'') => some (MetaCTerm.lam x t, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.plam" :: rest =>
match rest with
| Token.strLit i :: rest' =>
match parseMetaCTerm?Aux n rest' with
| some (t, Token.rparen :: rest'') => some (MetaCTerm.plam i t, rest'')
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.comp" :: rest =>
match rest with
| Token.strLit s :: rest1 =>
match parseMetaCTerm?Aux n rest1 with
| some (A, rest2) =>
match parseClassifier?Aux n rest2 with
| some (φ, rest3) =>
match parseMetaCTerm?Aux n rest3 with
| some (u, rest4) =>
match parseMetaCTerm?Aux n rest4 with
| some (t, Token.rparen :: rest5) =>
some (MetaCTerm.comp s A φ u t, rest5)
| _ => none
| _ => none
| _ => none
| _ => none
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.transp" :: rest =>
match rest with
| Token.strLit s :: rest1 =>
match parseMetaCTerm?Aux n rest1 with
| some (A, rest2) =>
match parseClassifier?Aux n rest2 with
| some (φ, rest3) =>
match parseMetaCTerm?Aux n rest3 with
| some (t, Token.rparen :: rest4) =>
some (MetaCTerm.transp s A φ t, rest4)
| _ => none
| _ => none
| _ => none
| _ => none
| _, _ => none
/-- Parse a `MetaArtifact`. -/
def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Token)
| 0, _ => none
| _+1, Token.ident "Infoductor.MetaArtifact.empty" :: rest =>
some (MetaArtifact.empty, rest)
| _+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.source" :: rest =>
match rest with
| Token.strLit s :: Token.rparen :: rest' =>
some (MetaArtifact.source s, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.refTo" :: rest =>
match parseName?Aux n rest with
| some (nm, Token.rparen :: rest') =>
some (MetaArtifact.refTo nm, rest')
| _ => none
| n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.cterm" :: rest =>
match parseMetaCTerm?Aux n rest with
| some (m, Token.rparen :: rest') =>
some (MetaArtifact.cterm m, rest')
| _ => none
| _, _ => none
-- ── Top-level wrappers ─────────────────────────────────────────────────────
-- Fuel = `tokens.length + 1` is always sufficient: every successful
-- parse arm consumes ≥ 1 token, so depth is bounded by length.
def parseName? (tokens : List Token) : Option (Lean.Name × List Token) :=
parseName?Aux (tokens.length + 1) tokens
def parseClassifier? (tokens : List Token) : Option (MetaClassifier × List Token) :=
parseClassifier?Aux (tokens.length + 1) tokens
def parseMetaCTerm? (tokens : List Token) : Option (MetaCTerm × List Token) :=
parseMetaCTerm?Aux (tokens.length + 1) tokens
def parseArtifact? (tokens : List Token) : Option (MetaArtifact × List Token) :=
parseArtifact?Aux (tokens.length + 1) tokens
def MetaCTerm.fromLeanSource? (s : String) : Option MetaCTerm :=
match parseMetaCTerm? (tokenizeStr s) with
| some (t, []) => some t
| _ => none
def MetaClassifier.fromLeanSource? (s : String) : Option MetaClassifier :=
match parseClassifier? (tokenizeStr s) with
| some (φ, []) => some φ
| _ => none
def MetaArtifact.fromLeanSource? (s : String) : Option MetaArtifact :=
match parseArtifact? (tokenizeStr s) with
| some (a, []) => some a
| _ => none
-- ── Canonical token forms — direct meta-mirror → List Token ────────────────
-- Each function mirrors the corresponding `toLeanSource` renderer
-- but produces tokens directly, bypassing the String layer. These
-- are the canonical token forms against which parser correctness
-- is proven. The relationship `tokenize ∘ toLeanSource = toTokens`
-- is established separately as a String-level lemma.
def nameToTokens : Lean.Name → List Token
| .anonymous => [Token.ident "Lean.Name.anonymous"]
| .str p s =>
[Token.lparen, Token.ident "Lean.Name.str"] ++ nameToTokens p ++
[Token.strLit s, Token.rparen]
| .num p k =>
[Token.lparen, Token.ident "Lean.Name.num"] ++ nameToTokens p ++
[Token.numLit k, Token.rparen]
def MetaClassifier.toTokens : MetaClassifier → List Token
| .always => [Token.ident "Infoductor.MetaClassifier.always"]
| .never => [Token.ident "Infoductor.MetaClassifier.never"]
| .atDecl n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.atDecl"] ++
nameToTokens n ++ [Token.rparen]
| .inFile s =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.inFile",
Token.strLit s, Token.rparen]
| .underAttribute n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.underAttribute"] ++
nameToTokens n ++ [Token.rparen]
| .dependencyOf n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.dependencyOf"] ++
nameToTokens n ++ [Token.rparen]
| .inNamespace n =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.inNamespace"] ++
nameToTokens n ++ [Token.rparen]
| .meet a b =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.meet"] ++
toTokens a ++ toTokens b ++ [Token.rparen]
| .join a b =>
[Token.lparen, Token.ident "Infoductor.MetaClassifier.join"] ++
toTokens a ++ toTokens b ++ [Token.rparen]
def MetaCTerm.toTokens : MetaCTerm → List Token
| .ident n =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.ident"] ++
nameToTokens n ++ [Token.rparen]
| .sym s =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.sym",
Token.strLit s, Token.rparen]
| .app f a =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.app"] ++
toTokens f ++ toTokens a ++ [Token.rparen]
| .lam x t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.lam",
Token.strLit x] ++ toTokens t ++ [Token.rparen]
| .plam i t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.plam",
Token.strLit i] ++ toTokens t ++ [Token.rparen]
| .comp s A φ u t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.comp",
Token.strLit s] ++ toTokens A ++ φ.toTokens ++
toTokens u ++ toTokens t ++ [Token.rparen]
| .transp s A φ t =>
[Token.lparen, Token.ident "Infoductor.MetaCTerm.transp",
Token.strLit s] ++ toTokens A ++ φ.toTokens ++
toTokens t ++ [Token.rparen]
| .empty => [Token.ident "Infoductor.MetaCTerm.empty"]
def MetaArtifact.toTokens : MetaArtifact → List Token
| .source s => [Token.lparen, Token.ident "Infoductor.MetaArtifact.source",
Token.strLit s, Token.rparen]
| .declAt _ => [] -- not source-renderable; toTokens undefined for this arm
| .cterm m => [Token.lparen, Token.ident "Infoductor.MetaArtifact.cterm"] ++
m.toTokens ++ [Token.rparen]
| .refTo n => [Token.lparen, Token.ident "Infoductor.MetaArtifact.refTo"] ++
nameToTokens n ++ [Token.rparen]
| .empty => [Token.ident "Infoductor.MetaArtifact.empty"]
-- ── Size measures (for fuel bookkeeping in proofs) ─────────────────────────
def nameDepth : Lean.Name → Nat
| .anonymous => 1
| .str p _ => nameDepth p + 1
| .num p _ => nameDepth p + 1
def classifierDepth : MetaClassifier → Nat
| .always | .never => 1
| .atDecl n => nameDepth n + 1
| .inFile _ => 2
| .underAttribute n => nameDepth n + 1
| .dependencyOf n => nameDepth n + 1
| .inNamespace n => nameDepth n + 1
| .meet a b => max (classifierDepth a) (classifierDepth b) + 1
| .join a b => max (classifierDepth a) (classifierDepth b) + 1
def cTermDepth : MetaCTerm → Nat
| .ident n => nameDepth n + 1
| .sym _ => 2
| .empty => 1
| .lam _ t | .plam _ t => cTermDepth t + 1
| .app f a => max (cTermDepth f) (cTermDepth a) + 1
| .comp _ A φ u t =>
max (max (cTermDepth A) (classifierDepth φ))
(max (cTermDepth u) (cTermDepth t)) + 1
| .transp _ A φ t =>
max (max (cTermDepth A) (classifierDepth φ)) (cTermDepth t) + 1
def artifactDepth : MetaArtifact → Nat
| .source _ => 2
| .declAt _ => 0 -- not source-encodable; round-trip excluded
| .cterm m => cTermDepth m + 1
| .refTo n => nameDepth n + 1
| .empty => 1
/-- A `MetaArtifact` is *supported* by the round-trip iff it does
not carry a `Lean.Syntax` (which cannot be source-rendered).
All other arms are supported. -/
def MetaArtifact.supported : MetaArtifact → Bool
| .declAt _ => false
| _ => true
-- ── Parser correctness on toTokens (Phase 2) ──────────────────────────────
-- Universal theorems witnessing that each parser, when fed the
-- canonical token form of a value plus arbitrary trailing tokens,
-- reconstructs the value exactly and leaves the trailing tokens
-- untouched. Provided fuel is at least the depth of the value.
-- Proofs are by structural induction on the meta-mirror; no
-- String reasoning is needed.
theorem parseName?Aux_correct :
∀ (n : Lean.Name) (rest : List Token) (fuel : Nat),
fuel ≥ nameDepth n →
parseName?Aux fuel (nameToTokens n ++ rest) = some (n, rest)
| .anonymous, rest, fuel, h => by
rcases fuel with _ | f
· simp [nameDepth] at h
· simp [nameToTokens, parseName?Aux]
| .str p s, rest, fuel, h => by
rcases fuel with _ | f
· simp [nameDepth] at h
· simp only [nameToTokens, List.append_assoc, List.cons_append,
List.nil_append, parseName?Aux]
have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega
have ih := parseName?Aux_correct p
(Token.strLit s :: Token.rparen :: rest) f hp
rw [ih]
| .num p k, rest, fuel, h => by
rcases fuel with _ | f
· simp [nameDepth] at h
· simp only [nameToTokens, List.append_assoc, List.cons_append,
List.nil_append, parseName?Aux]
have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega
have ih := parseName?Aux_correct p
(Token.numLit k :: Token.rparen :: rest) f hp
rw [ih]
theorem parseClassifier?Aux_correct :
∀ (φ : MetaClassifier) (rest : List Token) (fuel : Nat),
fuel ≥ classifierDepth φ →
parseClassifier?Aux fuel (φ.toTokens ++ rest) = some (φ, rest)
| .always, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp [MetaClassifier.toTokens, parseClassifier?Aux]
| .never, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp [MetaClassifier.toTokens, parseClassifier?Aux]
| .atDecl n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .inFile s, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp [MetaClassifier.toTokens, parseClassifier?Aux]
| .underAttribute n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by
simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .dependencyOf n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by
simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .inNamespace n, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have hn : f ≥ nameDepth n := by
simp [classifierDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .meet a b, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have ha : f ≥ classifierDepth a := by
simp [classifierDepth] at h; omega
have hb : f ≥ classifierDepth b := by
simp [classifierDepth] at h; omega
simp only [parseClassifier?Aux_correct a _ f ha,
parseClassifier?Aux_correct b _ f hb]
| .join a b, rest, fuel, h => by
rcases fuel with _ | f
· simp [classifierDepth] at h
· simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseClassifier?Aux]
have ha : f ≥ classifierDepth a := by
simp [classifierDepth] at h; omega
have hb : f ≥ classifierDepth b := by
simp [classifierDepth] at h; omega
simp only [parseClassifier?Aux_correct a _ f ha,
parseClassifier?Aux_correct b _ f hb]
theorem parseMetaCTerm?Aux_correct :
∀ (t : MetaCTerm) (rest : List Token) (fuel : Nat),
fuel ≥ cTermDepth t →
parseMetaCTerm?Aux fuel (t.toTokens ++ rest) = some (t, rest)
| .empty, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp [MetaCTerm.toTokens, parseMetaCTerm?Aux]
| .ident n, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hn : f ≥ nameDepth n := by simp [cTermDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .sym s, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp [MetaCTerm.toTokens, parseMetaCTerm?Aux]
| .app fa aa, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hf : f ≥ cTermDepth fa := by simp [cTermDepth] at h; omega
have ha : f ≥ cTermDepth aa := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct fa _ f hf,
parseMetaCTerm?Aux_correct aa _ f ha]
| .lam x t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct t _ f ht]
| .plam i t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct t _ f ht]
| .comp s A φ u t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega
have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega
have hu : f ≥ cTermDepth u := by simp [cTermDepth] at h; omega
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct A _ f hA,
parseClassifier?Aux_correct φ _ f hφ,
parseMetaCTerm?Aux_correct u _ f hu,
parseMetaCTerm?Aux_correct t _ f ht]
| .transp s A φ t, rest, fuel, h => by
rcases fuel with _ | f
· simp [cTermDepth] at h
· simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseMetaCTerm?Aux]
have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega
have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega
have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct A _ f hA,
parseClassifier?Aux_correct φ _ f hφ,
parseMetaCTerm?Aux_correct t _ f ht]
theorem parseArtifact?Aux_correct :
∀ (a : MetaArtifact) (rest : List Token) (fuel : Nat),
a.supported = true →
fuel ≥ artifactDepth a →
parseArtifact?Aux fuel (a.toTokens ++ rest) = some (a, rest)
| .empty, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp [MetaArtifact.toTokens, parseArtifact?Aux]
| .source s, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp [MetaArtifact.toTokens, parseArtifact?Aux]
| .refTo n, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseArtifact?Aux]
have hn : f ≥ nameDepth n := by simp [artifactDepth] at h; omega
rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
| .cterm m, rest, fuel, _, h => by
rcases fuel with _ | f
· simp [artifactDepth] at h
· simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append,
List.nil_append, parseArtifact?Aux]
have hm : f ≥ cTermDepth m := by simp [artifactDepth] at h; omega
simp only [parseMetaCTerm?Aux_correct m _ f hm]
| .declAt _, _, _, hsup, _ => by
simp [MetaArtifact.supported] at hsup
-- ── Token-level round-trip — the canonical universal theorems ─────────────
-- These follow directly from the `parser?Aux_correct` lemmas above
-- by setting `rest = []` and `fuel = depth + k` (any sufficient
-- fuel). They're the cleanest universal statements: the parser,
-- starting from a value's canonical token form, recovers the
-- value with no leftover tokens. No String reasoning needed.
/-- Top-level token-entry parser. Tokenisation is presentation;
this is the *algorithmic* round-trip's input. -/
def MetaCTerm.fromTokens? (tokens : List Token) : Option MetaCTerm :=
match parseMetaCTerm? tokens with
| some (t, []) => some t
| _ => none
def MetaClassifier.fromTokens? (tokens : List Token) : Option MetaClassifier :=
match parseClassifier? tokens with
| some (φ, []) => some φ
| _ => none
def Infoductor.nameFromTokens? (tokens : List Token) : Option Lean.Name :=
match parseName? tokens with
| some (n, []) => some n
| _ => none
def MetaArtifact.fromTokens? (tokens : List Token) : Option MetaArtifact :=
match parseArtifact? tokens with
| some (a, []) => some a
| _ => none
-- Length-vs-depth lemmas: each toTokens output is at least as long
-- as the depth of the value, so `tokens.length + 1` is always
-- sufficient fuel.
theorem nameToTokens_length_bound (n : Lean.Name) :
(nameToTokens n).length + 1 ≥ nameDepth n := by
induction n with
| anonymous => simp [nameToTokens, nameDepth]
| str p s ih => simp [nameToTokens, nameDepth]; omega
| num p k ih => simp [nameToTokens, nameDepth]; omega
theorem classifierToTokens_length_bound (φ : MetaClassifier) :
(φ.toTokens).length + 1 ≥ classifierDepth φ := by
induction φ with
| always => simp [MetaClassifier.toTokens, classifierDepth]
| never => simp [MetaClassifier.toTokens, classifierDepth]
| atDecl n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| inFile s => simp [MetaClassifier.toTokens, classifierDepth]
| underAttribute n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| dependencyOf n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| inNamespace n =>
simp [MetaClassifier.toTokens, classifierDepth]
have := nameToTokens_length_bound n; omega
| meet a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega
| join a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega
theorem cTermToTokens_length_bound (t : MetaCTerm) :
(t.toTokens).length + 1 ≥ cTermDepth t := by
induction t with
| empty => simp [MetaCTerm.toTokens, cTermDepth]
| ident n =>
simp [MetaCTerm.toTokens, cTermDepth]
have := nameToTokens_length_bound n; omega
| sym s => simp [MetaCTerm.toTokens, cTermDepth]
| app f a ihf iha => simp [MetaCTerm.toTokens, cTermDepth]; omega
| lam x t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega
| plam i t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega
| comp s A φ u t ihA ihu iht =>
simp [MetaCTerm.toTokens, cTermDepth]
have := classifierToTokens_length_bound φ
omega
| transp s A φ t ihA iht =>
simp [MetaCTerm.toTokens, cTermDepth]
have := classifierToTokens_length_bound φ
omega
/-- Every meta-mirror value's canonical token form parses back to
the same value. Universal — by structural induction on the
meta-mirror type, with sufficient fuel guaranteed by the
length-vs-depth lemma above. -/
theorem nameFromTokens?_round_trip (n : Lean.Name) :
Infoductor.nameFromTokens? (nameToTokens n) = some n := by
unfold Infoductor.nameFromTokens? parseName?
have h := nameToTokens_length_bound n
have := parseName?Aux_correct n [] ((nameToTokens n).length + 1) h
rw [List.append_nil] at this
rw [this]
theorem classifierFromTokens?_round_trip (φ : MetaClassifier) :
MetaClassifier.fromTokens? φ.toTokens = some φ := by
unfold MetaClassifier.fromTokens? parseClassifier?
have h := classifierToTokens_length_bound φ
have := parseClassifier?Aux_correct φ [] ((φ.toTokens).length + 1) h
rw [List.append_nil] at this
rw [this]
theorem cTermFromTokens?_round_trip (t : MetaCTerm) :
MetaCTerm.fromTokens? t.toTokens = some t := by
unfold MetaCTerm.fromTokens? parseMetaCTerm?
have h := cTermToTokens_length_bound t
have := parseMetaCTerm?Aux_correct t [] ((t.toTokens).length + 1) h
rw [List.append_nil] at this
rw [this]
theorem artifactFromTokens?_round_trip (a : MetaArtifact)
(hsup : a.supported = true) :
MetaArtifact.fromTokens? a.toTokens = some a := by
unfold MetaArtifact.fromTokens? parseArtifact?
-- artifact length bound
have hlen : (a.toTokens).length + 1 ≥ artifactDepth a := by
cases a with
| empty => simp [MetaArtifact.toTokens, artifactDepth]
| source s => simp [MetaArtifact.toTokens, artifactDepth]
| declAt _ => simp [MetaArtifact.supported] at hsup
| cterm m =>
simp [MetaArtifact.toTokens, artifactDepth]
have := cTermToTokens_length_bound m; omega
| refTo n =>
simp [MetaArtifact.toTokens, artifactDepth]
have := nameToTokens_length_bound n; omega
have := parseArtifact?Aux_correct a [] ((a.toTokens).length + 1) hsup hlen
rw [List.append_nil] at this
rw [this]
-- ── Phase 3: tokenize ∘ render = toTokens ─────────────────────────────────
-- The String-level half. Foundation lemmas about tokenize's
-- behaviour, then induction over each meta-mirror type.
/-- `tokenize` on `(` :: rest reduces to `lparen :: tokenize rest`. -/
theorem tokenize_lparen_cons (rest : List Char) :
tokenize ('(' :: rest) = Token.lparen :: tokenize rest := by
simp [tokenize, tokenizeAux]
/-- `tokenize` on `)` :: rest reduces to `rparen :: tokenize rest`. -/
theorem tokenize_rparen_cons (rest : List Char) :
tokenize (')' :: rest) = Token.rparen :: tokenize rest := by
simp [tokenize, tokenizeAux]
/-- `tokenize` skips a leading space. -/
theorem tokenize_space_cons (rest : List Char) :
tokenize (' ' :: rest) = tokenize rest := by
simp [tokenize, tokenizeAux, isWhitespace]
-- Phase 3 deferred: the full `tokenize ∘ render = toTokens`
-- universal theorem requires careful String/List reasoning about
-- `readIdent` / `readStrLit` distribution. Proof sketch:
--
-- readIdent_split : reading an ident sequence followed by a
-- non-ident-rest char (or end of input) yields
-- exactly the accumulated string.
-- readStrLit_split : reading an escapeStrLit-encoded body until
-- the closing `"` recovers the original string.
-- tokenize_app_clean: tokenize distributes over a concatenation
-- where the prefix ends "cleanly" (rparen,
-- whitespace, or strLit close).
-- tokenize_render_X: induction over each meta-mirror type using
-- the above plus IH on sub-values.
--
-- These compose to `∀ v, tokenize (toLeanSource v).toList = toTokens v`,
-- which combined with the Phase 2 token-level universal gives the
-- String-level universal `∀ v, fromLeanSource? (toLeanSource v) = some v`.
--
-- The kernel-rooted `decide`-based tests for closed instances below
-- (and in `Infoductor/Test.lean`) provide empirical coverage in
-- the meantime.
-- ── Round-trip — atomic kernel-reducible witnesses ─────────────────────────
-- For non-recursive shapes the round-trip is closed by `rfl` (or
-- `decide`) directly: rendering produces a fixed string, tokenising
-- produces a fixed token list, parsing returns the original. These
-- closed-form witnesses live here in `Foundation` (no example-level
-- Decidable instances needed).
theorem MetaCTerm.toLeanSource_empty_round_trip :
MetaCTerm.fromLeanSource? MetaCTerm.empty.toLeanSource = some MetaCTerm.empty := by
decide
theorem MetaClassifier.toLeanSource_always_round_trip :
MetaClassifier.fromLeanSource? MetaClassifier.always.toLeanSource =
some MetaClassifier.always := by
decide
theorem MetaClassifier.toLeanSource_never_round_trip :
MetaClassifier.fromLeanSource? MetaClassifier.never.toLeanSource =
some MetaClassifier.never := by
decide
theorem MetaArtifact.toLeanSource_empty_round_trip_render :
(MetaArtifact.fromLeanSource? MetaArtifact.empty.toLeanSource).map
MetaArtifact.toLeanSource = some MetaArtifact.empty.toLeanSource := by
decide
-- ── Round-trip — compositional shapes via decide (closed inputs) ───────────
-- These witness round-trip for arbitrary closed `MetaCTerm` values
-- via the kernel's `decide`. Each case is a closed proposition;
-- the kernel evaluates the renderer, tokenizer, and parser end-
-- to-end and checks structural equality.
theorem MetaCTerm.toLeanSource_ident_anonymous_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.ident Lean.Name.anonymous).toLeanSource =
some (MetaCTerm.ident Lean.Name.anonymous) := by
decide
theorem MetaCTerm.toLeanSource_app_empty_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.app .empty .empty).toLeanSource =
some (MetaCTerm.app .empty .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_lam_x_empty_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.lam "x" .empty).toLeanSource =
some (MetaCTerm.lam "x" .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_comp_round_trip :
MetaCTerm.fromLeanSource?
(MetaCTerm.comp "i" .empty .always .empty .empty).toLeanSource =
some (MetaCTerm.comp "i" .empty .always .empty .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_transp_round_trip :
MetaCTerm.fromLeanSource?
(MetaCTerm.transp "i" .empty .never .empty).toLeanSource =
some (MetaCTerm.transp "i" .empty .never .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_plam_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.plam "i" .empty).toLeanSource =
some (MetaCTerm.plam "i" .empty) := by
decide
set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_sym_round_trip :
MetaCTerm.fromLeanSource? (MetaCTerm.sym "x").toLeanSource =
some (MetaCTerm.sym "x") := by
decide
-- Compositional witness: a `.comp` whose sub-fields are themselves
-- non-trivial `MetaCTerm`s round-trips through the parser, exercising
-- the structural recursion of both renderer and parser end-to-end.
set_option maxRecDepth 16000 in
theorem MetaCTerm.toLeanSource_nested_round_trip :
MetaCTerm.fromLeanSource?
(MetaCTerm.comp "i" (.ident `Univ) .always
(.lam "x" (.sym "y")) (.sym "z")).toLeanSource =
some (MetaCTerm.comp "i" (.ident `Univ) .always
(.lam "x" (.sym "y")) (.sym "z")) := by
decide
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_meet_round_trip :
MetaClassifier.fromLeanSource?
(MetaClassifier.meet .always .never).toLeanSource =
some (MetaClassifier.meet .always .never) := by
decide
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_atDecl_round_trip :
MetaClassifier.fromLeanSource?
(MetaClassifier.atDecl `Foo).toLeanSource =
some (MetaClassifier.atDecl `Foo) := by
decide
-- 3-level deep name regression test: `eq0.i` (the FaceFormula
-- `eq0` encoding) round-trips. Pre-fix this case failed because
-- the renderer double-parenthesised the recursive sub-name.
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_atDecl_three_level_round_trip :
MetaClassifier.fromLeanSource?
(MetaClassifier.atDecl
(Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")).toLeanSource =
some (MetaClassifier.atDecl
(Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")) := by
decide
end Infoductor