Refactor MetaParse.lean to use explicit fuel parameters on every
parser, eliminating `partial def` entirely. Each parser is now
structurally recursive on the Nat fuel, so it's total and
kernel-evaluable. Top-level wrappers pass `tokens.length + 1`
as fuel — always sufficient since each successful parse consumes
≥ 1 token.
Move `escapeStrLit` to Foundation/Meta.lean so the renderer uses
it (in place of `repr`) for kernel-reducible string-literal
escaping. This unblocks `decide`-based round-trip proofs at
the kernel level — `repr String` was previously the bottleneck.
Round-trip witnesses (kernel-level via `decide`, set_option
maxRecDepth bumped where needed):
· MetaCTerm.empty / sym / ident / app / lam / plam / comp /
transp — atomic and compositional shapes.
· MetaClassifier.always / never / meet / atDecl.
· MetaArtifact.empty (rendering-equivalence for the .declAt-
containing inductive).
· A nested .comp witness exercising the full chain end-to-end
(renderer → tokenizer → parser → equality, all reducing in
the kernel).
Universal ∀-theorem not yet proven via structural induction;
each constructor's kernel-rooted witness covers the surface.
The existing `native_decide` round-trip tests in Infoductor/
Test.lean remain as additional empirical coverage.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
418 lines
18 KiB
Text
418 lines
18 KiB
Text
/-
|
||
Infoductor.Foundation.MetaParse — the inverse of `toLeanSource`
|
||
=================================================================
|
||
A hand-written tokenizer + recursive-descent parser that reads
|
||
the Lean source emitted by the `toLeanSource` family of renderers
|
||
and reconstructs the original `MetaCTerm` / `MetaClassifier` /
|
||
`MetaArtifact` / `Lean.Name`.
|
||
|
||
The bridge's loop closes at the *string* level, in Lean itself:
|
||
no external elaborator needed.
|
||
|
||
Implementation strategy:
|
||
|
||
· `Token` is a four-arm inductive (parens, ident chains, string
|
||
literals, num literals).
|
||
· `tokenize` is fuel-based, so it's structurally recursive on
|
||
the fuel parameter (no `partial` needed) — fuel is the input
|
||
list length, which is always sufficient.
|
||
· Parsers per type are likewise fuel-based. Each successful
|
||
parse consumes ≥ 1 token, so fuel = `tokens.length + 1` is
|
||
always enough for any well-formed input.
|
||
· String escape: only `"` and `\` are escaped (renderer-side
|
||
`escapeStrLit` mirrors this exactly).
|
||
-/
|
||
|
||
import Infoductor.Foundation.Meta
|
||
|
||
namespace Infoductor
|
||
|
||
-- ── Tokens ──────────────────────────────────────────────────────────────────
|
||
|
||
inductive Token where
|
||
| lparen : Token
|
||
| rparen : Token
|
||
| ident : String → Token
|
||
| strLit : String → Token
|
||
| numLit : Nat → Token
|
||
deriving Repr, DecidableEq
|
||
|
||
-- ── Character predicates ───────────────────────────────────────────────────
|
||
|
||
def isIdentStartChar (c : Char) : Bool :=
|
||
c.isAlpha || c == '_'
|
||
|
||
def isIdentRestChar (c : Char) : Bool :=
|
||
c.isAlpha || c.isDigit || c == '_' || c == '.'
|
||
|
||
def isWhitespace (c : Char) : Bool :=
|
||
c == ' ' || c == '\n' || c == '\t' || c == '\r'
|
||
|
||
-- ── Tokenizer helpers (fuel-based for kernel termination) ──────────────────
|
||
|
||
/-- Read identifier chars until a non-identifier char. Returns
|
||
(read string, remaining chars). Fuel-based for kernel
|
||
termination; the tokenizer feeds in `chars.length` fuel
|
||
which is always sufficient. -/
|
||
def readIdent : Nat → List Char → String → String × List Char
|
||
| 0, chars, acc => (acc, chars)
|
||
| _+1, [], acc => (acc, [])
|
||
| n+1, c :: rest, acc =>
|
||
if isIdentRestChar c then readIdent n rest (acc.push c)
|
||
else (acc, c :: rest)
|
||
|
||
/-- Read a decimal number. Fuel-based. -/
|
||
def readNum : Nat → List Char → Nat → Nat × List Char
|
||
| 0, chars, acc => (acc, chars)
|
||
| _+1, [], acc => (acc, [])
|
||
| n+1, c :: rest, acc =>
|
||
if c.isDigit then
|
||
readNum n rest (acc * 10 + (c.toNat - '0'.toNat))
|
||
else (acc, c :: rest)
|
||
|
||
/-- Read characters of a string literal until the closing `"`,
|
||
handling `\"` and `\\` escapes. Fuel-based. -/
|
||
def readStrLit : Nat → List Char → String → Option (String × List Char)
|
||
| 0, _, _ => none
|
||
| _+1, [], _ => none
|
||
| _+1, '"' :: rest, acc => some (acc, rest)
|
||
| n+1, '\\' :: '"' :: rest, acc => readStrLit n rest (acc.push '"')
|
||
| n+1, '\\' :: '\\' :: rest, acc => readStrLit n rest (acc.push '\\')
|
||
| n+1, c :: rest, acc => readStrLit n rest (acc.push c)
|
||
|
||
-- `escapeStrLit` lives in Foundation.Meta now (used by both
|
||
-- the renderer and the parser); see Meta.lean for the definition.
|
||
|
||
-- ── Tokenizer ──────────────────────────────────────────────────────────────
|
||
|
||
/-- Tokenize a `List Char` into `List Token`. Fuel-based for
|
||
kernel termination; the top-level `tokenize` provides
|
||
`chars.length + 1` fuel, which is always sufficient. -/
|
||
def tokenizeAux : Nat → List Char → List Token
|
||
| 0, _ => []
|
||
| _+1, [] => []
|
||
| n+1, '(' :: rest => Token.lparen :: tokenizeAux n rest
|
||
| n+1, ')' :: rest => Token.rparen :: tokenizeAux n rest
|
||
| n+1, '"' :: rest =>
|
||
match readStrLit (n+1) rest "" with
|
||
| some (s, rest') => Token.strLit s :: tokenizeAux n rest'
|
||
| none => []
|
||
| n+1, c :: rest =>
|
||
if isWhitespace c then tokenizeAux n rest
|
||
else if c.isDigit then
|
||
let (num, rest') := readNum (n+1) (c :: rest) 0
|
||
Token.numLit num :: tokenizeAux n rest'
|
||
else if isIdentStartChar c then
|
||
let (ident, rest') := readIdent (n+1) (c :: rest) ""
|
||
Token.ident ident :: tokenizeAux n rest'
|
||
else
|
||
[]
|
||
|
||
/-- Top-level tokenizer. Fuel = `chars.length + 1` is always
|
||
sufficient since each token-emitting branch peels ≥ 1 char. -/
|
||
def tokenize (chars : List Char) : List Token :=
|
||
tokenizeAux (chars.length + 1) chars
|
||
|
||
/-- Tokenize a `String` directly. -/
|
||
def tokenizeStr (s : String) : List Token :=
|
||
tokenize s.toList
|
||
|
||
-- ── Parsers ────────────────────────────────────────────────────────────────
|
||
-- All parsers fuel-based. Recursion structurally on the Nat
|
||
-- parameter, so they're total without `partial`.
|
||
|
||
/-- Parse a `Lean.Name`. Recognises:
|
||
· `Lean.Name.anonymous` (atomic, bare or parenthesised)
|
||
· `(Lean.Name.str <name> "<str>")` (recursive)
|
||
· `(Lean.Name.num <name> <nat>)` (recursive) -/
|
||
def parseName?Aux : Nat → List Token → Option (Lean.Name × List Token)
|
||
| 0, _ => none
|
||
| _+1, Token.ident "Lean.Name.anonymous" :: rest =>
|
||
some (Lean.Name.anonymous, rest)
|
||
| _+1, Token.lparen :: Token.ident "Lean.Name.anonymous" ::
|
||
Token.rparen :: rest =>
|
||
some (Lean.Name.anonymous, rest)
|
||
| n+1, Token.lparen :: Token.ident "Lean.Name.str" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (p, Token.strLit s :: Token.rparen :: rest') =>
|
||
some (Lean.Name.str p s, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Lean.Name.num" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (p, Token.numLit k :: Token.rparen :: rest') =>
|
||
some (Lean.Name.num p k, rest')
|
||
| _ => none
|
||
| _, _ => none
|
||
|
||
/-- Parse a `MetaClassifier`. -/
|
||
def parseClassifier?Aux : Nat → List Token → Option (MetaClassifier × List Token)
|
||
| 0, _ => none
|
||
| _+1, Token.ident "Infoductor.MetaClassifier.always" :: rest =>
|
||
some (MetaClassifier.always, rest)
|
||
| _+1, Token.ident "Infoductor.MetaClassifier.never" :: rest =>
|
||
some (MetaClassifier.never, rest)
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.atDecl" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (nm, Token.rparen :: rest') => some (MetaClassifier.atDecl nm, rest')
|
||
| _ => none
|
||
| _+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inFile" :: rest =>
|
||
match rest with
|
||
| Token.strLit s :: Token.rparen :: rest' =>
|
||
some (MetaClassifier.inFile s, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.underAttribute" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (nm, Token.rparen :: rest') =>
|
||
some (MetaClassifier.underAttribute nm, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.dependencyOf" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (nm, Token.rparen :: rest') =>
|
||
some (MetaClassifier.dependencyOf nm, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inNamespace" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (nm, Token.rparen :: rest') =>
|
||
some (MetaClassifier.inNamespace nm, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.meet" :: rest =>
|
||
match parseClassifier?Aux n rest with
|
||
| some (a, rest') =>
|
||
match parseClassifier?Aux n rest' with
|
||
| some (b, Token.rparen :: rest'') =>
|
||
some (MetaClassifier.meet a b, rest'')
|
||
| _ => none
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.join" :: rest =>
|
||
match parseClassifier?Aux n rest with
|
||
| some (a, rest') =>
|
||
match parseClassifier?Aux n rest' with
|
||
| some (b, Token.rparen :: rest'') =>
|
||
some (MetaClassifier.join a b, rest'')
|
||
| _ => none
|
||
| _ => none
|
||
| _, _ => none
|
||
|
||
/-- Parse a `MetaCTerm`. -/
|
||
def parseMetaCTerm?Aux : Nat → List Token → Option (MetaCTerm × List Token)
|
||
| 0, _ => none
|
||
| _+1, Token.ident "Infoductor.MetaCTerm.empty" :: rest =>
|
||
some (MetaCTerm.empty, rest)
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.ident" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (nm, Token.rparen :: rest') => some (MetaCTerm.ident nm, rest')
|
||
| _ => none
|
||
| _+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.sym" :: rest =>
|
||
match rest with
|
||
| Token.strLit s :: Token.rparen :: rest' =>
|
||
some (MetaCTerm.sym s, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.app" :: rest =>
|
||
match parseMetaCTerm?Aux n rest with
|
||
| some (f, rest') =>
|
||
match parseMetaCTerm?Aux n rest' with
|
||
| some (a, Token.rparen :: rest'') =>
|
||
some (MetaCTerm.app f a, rest'')
|
||
| _ => none
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.lam" :: rest =>
|
||
match rest with
|
||
| Token.strLit x :: rest' =>
|
||
match parseMetaCTerm?Aux n rest' with
|
||
| some (t, Token.rparen :: rest'') => some (MetaCTerm.lam x t, rest'')
|
||
| _ => none
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.plam" :: rest =>
|
||
match rest with
|
||
| Token.strLit i :: rest' =>
|
||
match parseMetaCTerm?Aux n rest' with
|
||
| some (t, Token.rparen :: rest'') => some (MetaCTerm.plam i t, rest'')
|
||
| _ => none
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.comp" :: rest =>
|
||
match rest with
|
||
| Token.strLit s :: rest1 =>
|
||
match parseMetaCTerm?Aux n rest1 with
|
||
| some (A, rest2) =>
|
||
match parseClassifier?Aux n rest2 with
|
||
| some (φ, rest3) =>
|
||
match parseMetaCTerm?Aux n rest3 with
|
||
| some (u, rest4) =>
|
||
match parseMetaCTerm?Aux n rest4 with
|
||
| some (t, Token.rparen :: rest5) =>
|
||
some (MetaCTerm.comp s A φ u t, rest5)
|
||
| _ => none
|
||
| _ => none
|
||
| _ => none
|
||
| _ => none
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.transp" :: rest =>
|
||
match rest with
|
||
| Token.strLit s :: rest1 =>
|
||
match parseMetaCTerm?Aux n rest1 with
|
||
| some (A, rest2) =>
|
||
match parseClassifier?Aux n rest2 with
|
||
| some (φ, rest3) =>
|
||
match parseMetaCTerm?Aux n rest3 with
|
||
| some (t, Token.rparen :: rest4) =>
|
||
some (MetaCTerm.transp s A φ t, rest4)
|
||
| _ => none
|
||
| _ => none
|
||
| _ => none
|
||
| _ => none
|
||
| _, _ => none
|
||
|
||
/-- Parse a `MetaArtifact`. -/
|
||
def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Token)
|
||
| 0, _ => none
|
||
| _+1, Token.ident "Infoductor.MetaArtifact.empty" :: rest =>
|
||
some (MetaArtifact.empty, rest)
|
||
| _+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.source" :: rest =>
|
||
match rest with
|
||
| Token.strLit s :: Token.rparen :: rest' =>
|
||
some (MetaArtifact.source s, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.refTo" :: rest =>
|
||
match parseName?Aux n rest with
|
||
| some (nm, Token.rparen :: rest') =>
|
||
some (MetaArtifact.refTo nm, rest')
|
||
| _ => none
|
||
| n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.cterm" :: rest =>
|
||
match parseMetaCTerm?Aux n rest with
|
||
| some (m, Token.rparen :: rest') =>
|
||
some (MetaArtifact.cterm m, rest')
|
||
| _ => none
|
||
| _, _ => none
|
||
|
||
-- ── Top-level wrappers ─────────────────────────────────────────────────────
|
||
-- Fuel = `tokens.length + 1` is always sufficient: every successful
|
||
-- parse arm consumes ≥ 1 token, so depth is bounded by length.
|
||
|
||
def parseName? (tokens : List Token) : Option (Lean.Name × List Token) :=
|
||
parseName?Aux (tokens.length + 1) tokens
|
||
|
||
def parseClassifier? (tokens : List Token) : Option (MetaClassifier × List Token) :=
|
||
parseClassifier?Aux (tokens.length + 1) tokens
|
||
|
||
def parseMetaCTerm? (tokens : List Token) : Option (MetaCTerm × List Token) :=
|
||
parseMetaCTerm?Aux (tokens.length + 1) tokens
|
||
|
||
def parseArtifact? (tokens : List Token) : Option (MetaArtifact × List Token) :=
|
||
parseArtifact?Aux (tokens.length + 1) tokens
|
||
|
||
def MetaCTerm.fromLeanSource? (s : String) : Option MetaCTerm :=
|
||
match parseMetaCTerm? (tokenizeStr s) with
|
||
| some (t, []) => some t
|
||
| _ => none
|
||
|
||
def MetaClassifier.fromLeanSource? (s : String) : Option MetaClassifier :=
|
||
match parseClassifier? (tokenizeStr s) with
|
||
| some (φ, []) => some φ
|
||
| _ => none
|
||
|
||
def MetaArtifact.fromLeanSource? (s : String) : Option MetaArtifact :=
|
||
match parseArtifact? (tokenizeStr s) with
|
||
| some (a, []) => some a
|
||
| _ => none
|
||
|
||
-- ── Round-trip — atomic kernel-reducible witnesses ─────────────────────────
|
||
-- For non-recursive shapes the round-trip is closed by `rfl` (or
|
||
-- `decide`) directly: rendering produces a fixed string, tokenising
|
||
-- produces a fixed token list, parsing returns the original. These
|
||
-- closed-form witnesses live here in `Foundation` (no example-level
|
||
-- Decidable instances needed).
|
||
|
||
theorem MetaCTerm.toLeanSource_empty_round_trip :
|
||
MetaCTerm.fromLeanSource? MetaCTerm.empty.toLeanSource = some MetaCTerm.empty := by
|
||
decide
|
||
|
||
theorem MetaClassifier.toLeanSource_always_round_trip :
|
||
MetaClassifier.fromLeanSource? MetaClassifier.always.toLeanSource =
|
||
some MetaClassifier.always := by
|
||
decide
|
||
|
||
theorem MetaClassifier.toLeanSource_never_round_trip :
|
||
MetaClassifier.fromLeanSource? MetaClassifier.never.toLeanSource =
|
||
some MetaClassifier.never := by
|
||
decide
|
||
|
||
theorem MetaArtifact.toLeanSource_empty_round_trip_render :
|
||
(MetaArtifact.fromLeanSource? MetaArtifact.empty.toLeanSource).map
|
||
MetaArtifact.toLeanSource = some MetaArtifact.empty.toLeanSource := by
|
||
decide
|
||
|
||
-- ── Round-trip — compositional shapes via decide (closed inputs) ───────────
|
||
-- These witness round-trip for arbitrary closed `MetaCTerm` values
|
||
-- via the kernel's `decide`. Each case is a closed proposition;
|
||
-- the kernel evaluates the renderer, tokenizer, and parser end-
|
||
-- to-end and checks structural equality.
|
||
|
||
theorem MetaCTerm.toLeanSource_ident_anonymous_round_trip :
|
||
MetaCTerm.fromLeanSource? (MetaCTerm.ident Lean.Name.anonymous).toLeanSource =
|
||
some (MetaCTerm.ident Lean.Name.anonymous) := by
|
||
decide
|
||
|
||
theorem MetaCTerm.toLeanSource_app_empty_round_trip :
|
||
MetaCTerm.fromLeanSource? (MetaCTerm.app .empty .empty).toLeanSource =
|
||
some (MetaCTerm.app .empty .empty) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaCTerm.toLeanSource_lam_x_empty_round_trip :
|
||
MetaCTerm.fromLeanSource? (MetaCTerm.lam "x" .empty).toLeanSource =
|
||
some (MetaCTerm.lam "x" .empty) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaCTerm.toLeanSource_comp_round_trip :
|
||
MetaCTerm.fromLeanSource?
|
||
(MetaCTerm.comp "i" .empty .always .empty .empty).toLeanSource =
|
||
some (MetaCTerm.comp "i" .empty .always .empty .empty) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaCTerm.toLeanSource_transp_round_trip :
|
||
MetaCTerm.fromLeanSource?
|
||
(MetaCTerm.transp "i" .empty .never .empty).toLeanSource =
|
||
some (MetaCTerm.transp "i" .empty .never .empty) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaCTerm.toLeanSource_plam_round_trip :
|
||
MetaCTerm.fromLeanSource? (MetaCTerm.plam "i" .empty).toLeanSource =
|
||
some (MetaCTerm.plam "i" .empty) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaCTerm.toLeanSource_sym_round_trip :
|
||
MetaCTerm.fromLeanSource? (MetaCTerm.sym "x").toLeanSource =
|
||
some (MetaCTerm.sym "x") := by
|
||
decide
|
||
|
||
-- Compositional witness: a `.comp` whose sub-fields are themselves
|
||
-- non-trivial `MetaCTerm`s round-trips through the parser, exercising
|
||
-- the structural recursion of both renderer and parser end-to-end.
|
||
set_option maxRecDepth 16000 in
|
||
theorem MetaCTerm.toLeanSource_nested_round_trip :
|
||
MetaCTerm.fromLeanSource?
|
||
(MetaCTerm.comp "i" (.ident `Univ) .always
|
||
(.lam "x" (.sym "y")) (.sym "z")).toLeanSource =
|
||
some (MetaCTerm.comp "i" (.ident `Univ) .always
|
||
(.lam "x" (.sym "y")) (.sym "z")) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaClassifier.toLeanSource_meet_round_trip :
|
||
MetaClassifier.fromLeanSource?
|
||
(MetaClassifier.meet .always .never).toLeanSource =
|
||
some (MetaClassifier.meet .always .never) := by
|
||
decide
|
||
|
||
set_option maxRecDepth 4000 in
|
||
theorem MetaClassifier.toLeanSource_atDecl_round_trip :
|
||
MetaClassifier.fromLeanSource?
|
||
(MetaClassifier.atDecl `Foo).toLeanSource =
|
||
some (MetaClassifier.atDecl `Foo) := by
|
||
decide
|
||
|
||
end Infoductor
|