/- Infoductor.Foundation.MetaParse — the inverse of `toLeanSource` ================================================================= A hand-written tokenizer + recursive-descent parser that reads the Lean source emitted by the `toLeanSource` family of renderers and reconstructs the original `MetaCTerm` / `MetaClassifier` / `MetaArtifact` / `Lean.Name`. The bridge's loop closes at the *string* level, in Lean itself: no external elaborator needed. Implementation strategy: · `Token` is a four-arm inductive (parens, ident chains, string literals, num literals). · `tokenize` is fuel-based, so it's structurally recursive on the fuel parameter (no `partial` needed) — fuel is the input list length, which is always sufficient. · Parsers per type are likewise fuel-based. Each successful parse consumes ≥ 1 token, so fuel = `tokens.length + 1` is always enough for any well-formed input. · String escape: only `"` and `\` are escaped (renderer-side `escapeStrLit` mirrors this exactly). -/ import Infoductor.Foundation.Meta namespace Infoductor -- ── Tokens ────────────────────────────────────────────────────────────────── inductive Token where | lparen : Token | rparen : Token | ident : String → Token | strLit : String → Token | numLit : Nat → Token deriving Repr, DecidableEq -- ── Character predicates ─────────────────────────────────────────────────── def isIdentStartChar (c : Char) : Bool := c.isAlpha || c == '_' def isIdentRestChar (c : Char) : Bool := c.isAlpha || c.isDigit || c == '_' || c == '.' def isWhitespace (c : Char) : Bool := c == ' ' || c == '\n' || c == '\t' || c == '\r' -- ── Tokenizer helpers (fuel-based for kernel termination) ────────────────── /-- Read identifier chars until a non-identifier char. Returns (read string, remaining chars). Fuel-based for kernel termination; the tokenizer feeds in `chars.length` fuel which is always sufficient. -/ def readIdent : Nat → List Char → String → String × List Char | 0, chars, acc => (acc, chars) | _+1, [], acc => (acc, []) | n+1, c :: rest, acc => if isIdentRestChar c then readIdent n rest (acc.push c) else (acc, c :: rest) /-- Read a decimal number. Fuel-based. -/ def readNum : Nat → List Char → Nat → Nat × List Char | 0, chars, acc => (acc, chars) | _+1, [], acc => (acc, []) | n+1, c :: rest, acc => if c.isDigit then readNum n rest (acc * 10 + (c.toNat - '0'.toNat)) else (acc, c :: rest) /-- Read characters of a string literal until the closing `"`, handling `\"` and `\\` escapes. Fuel-based. -/ def readStrLit : Nat → List Char → String → Option (String × List Char) | 0, _, _ => none | _+1, [], _ => none | _+1, '"' :: rest, acc => some (acc, rest) | n+1, '\\' :: '"' :: rest, acc => readStrLit n rest (acc.push '"') | n+1, '\\' :: '\\' :: rest, acc => readStrLit n rest (acc.push '\\') | n+1, c :: rest, acc => readStrLit n rest (acc.push c) -- `escapeStrLit` lives in Foundation.Meta now (used by both -- the renderer and the parser); see Meta.lean for the definition. -- ── Tokenizer ────────────────────────────────────────────────────────────── /-- Tokenize a `List Char` into `List Token`. Fuel-based for kernel termination; the top-level `tokenize` provides `chars.length + 1` fuel, which is always sufficient. -/ def tokenizeAux : Nat → List Char → List Token | 0, _ => [] | _+1, [] => [] | n+1, '(' :: rest => Token.lparen :: tokenizeAux n rest | n+1, ')' :: rest => Token.rparen :: tokenizeAux n rest | n+1, '"' :: rest => match readStrLit (n+1) rest "" with | some (s, rest') => Token.strLit s :: tokenizeAux n rest' | none => [] | n+1, c :: rest => if isWhitespace c then tokenizeAux n rest else if c.isDigit then let (num, rest') := readNum (n+1) (c :: rest) 0 Token.numLit num :: tokenizeAux n rest' else if isIdentStartChar c then let (ident, rest') := readIdent (n+1) (c :: rest) "" Token.ident ident :: tokenizeAux n rest' else [] /-- Top-level tokenizer. Fuel = `chars.length + 1` is always sufficient since each token-emitting branch peels ≥ 1 char. -/ def tokenize (chars : List Char) : List Token := tokenizeAux (chars.length + 1) chars /-- Tokenize a `String` directly. -/ def tokenizeStr (s : String) : List Token := tokenize s.toList -- ── Parsers ──────────────────────────────────────────────────────────────── -- All parsers fuel-based. Recursion structurally on the Nat -- parameter, so they're total without `partial`. /-- Parse a `Lean.Name`. Recognises: · `Lean.Name.anonymous` (atomic) · `(Lean.Name.str "")` (recursive) · `(Lean.Name.num )` (recursive) The renderer no longer wraps recursive sub-name calls in extra parens (post-fix), so the parser handles only the canonical bare-atomic + constructor-app forms. -/ def parseName?Aux : Nat → List Token → Option (Lean.Name × List Token) | 0, _ => none | _+1, Token.ident "Lean.Name.anonymous" :: rest => some (Lean.Name.anonymous, rest) | n+1, Token.lparen :: Token.ident "Lean.Name.str" :: rest => match parseName?Aux n rest with | some (p, Token.strLit s :: Token.rparen :: rest') => some (Lean.Name.str p s, rest') | _ => none | n+1, Token.lparen :: Token.ident "Lean.Name.num" :: rest => match parseName?Aux n rest with | some (p, Token.numLit k :: Token.rparen :: rest') => some (Lean.Name.num p k, rest') | _ => none | _, _ => none /-- Parse a `MetaClassifier`. -/ def parseClassifier?Aux : Nat → List Token → Option (MetaClassifier × List Token) | 0, _ => none | _+1, Token.ident "Infoductor.MetaClassifier.always" :: rest => some (MetaClassifier.always, rest) | _+1, Token.ident "Infoductor.MetaClassifier.never" :: rest => some (MetaClassifier.never, rest) | n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.atDecl" :: rest => match parseName?Aux n rest with | some (nm, Token.rparen :: rest') => some (MetaClassifier.atDecl nm, rest') | _ => none | _+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inFile" :: rest => match rest with | Token.strLit s :: Token.rparen :: rest' => some (MetaClassifier.inFile s, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.underAttribute" :: rest => match parseName?Aux n rest with | some (nm, Token.rparen :: rest') => some (MetaClassifier.underAttribute nm, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.dependencyOf" :: rest => match parseName?Aux n rest with | some (nm, Token.rparen :: rest') => some (MetaClassifier.dependencyOf nm, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.inNamespace" :: rest => match parseName?Aux n rest with | some (nm, Token.rparen :: rest') => some (MetaClassifier.inNamespace nm, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.meet" :: rest => match parseClassifier?Aux n rest with | some (a, rest') => match parseClassifier?Aux n rest' with | some (b, Token.rparen :: rest'') => some (MetaClassifier.meet a b, rest'') | _ => none | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaClassifier.join" :: rest => match parseClassifier?Aux n rest with | some (a, rest') => match parseClassifier?Aux n rest' with | some (b, Token.rparen :: rest'') => some (MetaClassifier.join a b, rest'') | _ => none | _ => none | _, _ => none /-- Parse a `MetaCTerm`. -/ def parseMetaCTerm?Aux : Nat → List Token → Option (MetaCTerm × List Token) | 0, _ => none | _+1, Token.ident "Infoductor.MetaCTerm.empty" :: rest => some (MetaCTerm.empty, rest) | n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.ident" :: rest => match parseName?Aux n rest with | some (nm, Token.rparen :: rest') => some (MetaCTerm.ident nm, rest') | _ => none | _+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.sym" :: rest => match rest with | Token.strLit s :: Token.rparen :: rest' => some (MetaCTerm.sym s, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.app" :: rest => match parseMetaCTerm?Aux n rest with | some (f, rest') => match parseMetaCTerm?Aux n rest' with | some (a, Token.rparen :: rest'') => some (MetaCTerm.app f a, rest'') | _ => none | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.lam" :: rest => match rest with | Token.strLit x :: rest' => match parseMetaCTerm?Aux n rest' with | some (t, Token.rparen :: rest'') => some (MetaCTerm.lam x t, rest'') | _ => none | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.plam" :: rest => match rest with | Token.strLit i :: rest' => match parseMetaCTerm?Aux n rest' with | some (t, Token.rparen :: rest'') => some (MetaCTerm.plam i t, rest'') | _ => none | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.comp" :: rest => match rest with | Token.strLit s :: rest1 => match parseMetaCTerm?Aux n rest1 with | some (A, rest2) => match parseClassifier?Aux n rest2 with | some (φ, rest3) => match parseMetaCTerm?Aux n rest3 with | some (u, rest4) => match parseMetaCTerm?Aux n rest4 with | some (t, Token.rparen :: rest5) => some (MetaCTerm.comp s A φ u t, rest5) | _ => none | _ => none | _ => none | _ => none | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaCTerm.transp" :: rest => match rest with | Token.strLit s :: rest1 => match parseMetaCTerm?Aux n rest1 with | some (A, rest2) => match parseClassifier?Aux n rest2 with | some (φ, rest3) => match parseMetaCTerm?Aux n rest3 with | some (t, Token.rparen :: rest4) => some (MetaCTerm.transp s A φ t, rest4) | _ => none | _ => none | _ => none | _ => none | _, _ => none -- ── Parsers for Lean.Syntax mirror payloads ───────────────────────────────── -- Mirror the renderer in `Foundation.Meta`: each constructor of -- `Lean.Syntax`, `Lean.SourceInfo`, `Substring.Raw`, `String.Pos.Raw`, -- and `Lean.Syntax.Preresolved` is recognised by a leading-ident -- pattern. All parsers fuel-bounded for kernel termination. /-- Parse a `String.Pos.Raw`. -/ def parseStringPosRaw?Aux : Nat → List Token → Option (String.Pos.Raw × List Token) | 0, _ => none | _+1, Token.lparen :: Token.ident "String.Pos.Raw.mk" :: Token.numLit k :: Token.rparen :: rest => some ({ byteIdx := k }, rest) | _, _ => none /-- Parse a `Substring.Raw`. -/ def parseSubstringRaw?Aux : Nat → List Token → Option (Substring.Raw × List Token) | 0, _ => none | n+1, Token.lparen :: Token.ident "Substring.Raw.mk" :: Token.strLit s :: rest => match parseStringPosRaw?Aux n rest with | some (startPos, rest') => match parseStringPosRaw?Aux n rest' with | some (stopPos, Token.rparen :: rest'') => some ({ str := s, startPos := startPos, stopPos := stopPos }, rest'') | _ => none | _ => none | _, _ => none /-- Parse a `Bool` literal — `true` or `false`. Used by the `SourceInfo.synthetic` parser. -/ def parseBool?Aux : List Token → Option (Bool × List Token) | Token.ident "true" :: rest => some (true, rest) | Token.ident "false" :: rest => some (false, rest) | _ => none /-- Parse a `Lean.SourceInfo`. -/ def parseSourceInfo?Aux : Nat → List Token → Option (Lean.SourceInfo × List Token) | 0, _ => none | _+1, Token.ident "Lean.SourceInfo.none" :: rest => some (Lean.SourceInfo.none, rest) | n+1, Token.lparen :: Token.ident "Lean.SourceInfo.original" :: rest => match parseSubstringRaw?Aux n rest with | some (lead, rest1) => match parseStringPosRaw?Aux n rest1 with | some (pos, rest2) => match parseSubstringRaw?Aux n rest2 with | some (trail, rest3) => match parseStringPosRaw?Aux n rest3 with | some (endPos, Token.rparen :: rest4) => some (Lean.SourceInfo.original lead pos trail endPos, rest4) | _ => none | _ => none | _ => none | _ => none | n+1, Token.lparen :: Token.ident "Lean.SourceInfo.synthetic" :: rest => match parseStringPosRaw?Aux n rest with | some (pos, rest1) => match parseStringPosRaw?Aux n rest1 with | some (endPos, rest2) => match parseBool?Aux rest2 with | some (canonical, Token.rparen :: rest3) => some (Lean.SourceInfo.synthetic pos endPos canonical, rest3) | _ => none | _ => none | _ => none | _, _ => none /-- Parse a `List String` rendered as a `List.cons`/`List.nil` chain. -/ def parseStringList?Aux : Nat → List Token → Option (List String × List Token) | 0, _ => none | _+1, Token.ident "List.nil" :: rest => some ([], rest) | n+1, Token.lparen :: Token.ident "List.cons" :: Token.strLit s :: rest => match parseStringList?Aux n rest with | some (tail, Token.rparen :: rest') => some (s :: tail, rest') | _ => none | _, _ => none /-- Parse a `Lean.Syntax.Preresolved`. -/ def parsePreresolved?Aux : Nat → List Token → Option (Lean.Syntax.Preresolved × List Token) | 0, _ => none | n+1, Token.lparen :: Token.ident "Lean.Syntax.Preresolved.namespace" :: rest => match parseName?Aux n rest with | some (ns, Token.rparen :: rest') => some (Lean.Syntax.Preresolved.namespace ns, rest') | _ => none | n+1, Token.lparen :: Token.ident "Lean.Syntax.Preresolved.decl" :: rest => match parseName?Aux n rest with | some (nm, rest1) => match parseStringList?Aux n rest1 with | some (fields, Token.rparen :: rest2) => some (Lean.Syntax.Preresolved.decl nm fields, rest2) | _ => none | _ => none | _, _ => none /-- Parse a `List Lean.Syntax.Preresolved`. -/ def parsePreresolvedList?Aux : Nat → List Token → Option (List Lean.Syntax.Preresolved × List Token) | 0, _ => none | _+1, Token.ident "List.nil" :: rest => some ([], rest) | n+1, Token.lparen :: Token.ident "List.cons" :: rest => match parsePreresolved?Aux n rest with | some (head, rest1) => match parsePreresolvedList?Aux n rest1 with | some (tail, Token.rparen :: rest2) => some (head :: tail, rest2) | _ => none | _ => none | _, _ => none /-- Joint parser for `Lean.Syntax` / `List Lean.Syntax`. Mirrors the renderer's `Sum` carrier idiom (`syntaxRenderAux`) so the parser matches the encoding's recursive structure exactly. The carrier `Lean.Syntax ⊕ List Lean.Syntax` is used at the *type* level to select the parser arm; the result is the parsed value plus the unconsumed token tail. Fuel-bounded for kernel termination. -/ def parseSyntaxOrList?Aux : Nat → Bool → List Token → Option ((Lean.Syntax ⊕ List Lean.Syntax) × List Token) | 0, _, _ => none -- Single-Syntax mode, `true` | _+1, true, Token.ident "Lean.Syntax.missing" :: rest => some (.inl Lean.Syntax.missing, rest) | n+1, true, Token.lparen :: Token.ident "Lean.Syntax.atom" :: rest => match parseSourceInfo?Aux n rest with | some (info, Token.strLit val :: Token.rparen :: rest') => some (.inl (Lean.Syntax.atom info val), rest') | _ => none | n+1, true, Token.lparen :: Token.ident "Lean.Syntax.ident" :: rest => match parseSourceInfo?Aux n rest with | some (info, rest1) => match parseSubstringRaw?Aux n rest1 with | some (rawVal, rest2) => match parseName?Aux n rest2 with | some (val, rest3) => match parsePreresolvedList?Aux n rest3 with | some (preresolved, Token.rparen :: rest4) => some (.inl (Lean.Syntax.ident info rawVal val preresolved), rest4) | _ => none | _ => none | _ => none | _ => none | n+1, true, Token.lparen :: Token.ident "Lean.Syntax.node" :: rest => match parseSourceInfo?Aux n rest with | some (info, rest1) => match parseName?Aux n rest1 with | some (kind, rest2) => match parseSyntaxOrList?Aux n false rest2 with | some (.inr args, Token.rparen :: rest3) => some (.inl (Lean.Syntax.node info kind args.toArray), rest3) | _ => none | _ => none | _ => none -- List-Syntax mode, `false` | _+1, false, Token.ident "List.nil" :: rest => some (.inr [], rest) | n+1, false, Token.lparen :: Token.ident "List.cons" :: rest => match parseSyntaxOrList?Aux n true rest with | some (.inl head, rest1) => match parseSyntaxOrList?Aux n false rest1 with | some (.inr tail, Token.rparen :: rest2) => some (.inr (head :: tail), rest2) | _ => none | _ => none | _, _, _ => none /-- Parse a `Lean.Syntax`. Thin projection over `parseSyntaxOrList?Aux` in single-Syntax mode (`true`). -/ def parseSyntax?Aux : Nat → List Token → Option (Lean.Syntax × List Token) | n, tokens => match parseSyntaxOrList?Aux n true tokens with | some (.inl s, rest) => some (s, rest) | _ => none /-- Parse a `List Lean.Syntax`. Thin projection over `parseSyntaxOrList?Aux` in list-mode (`false`). -/ def parseSyntaxList?Aux : Nat → List Token → Option (List Lean.Syntax × List Token) | n, tokens => match parseSyntaxOrList?Aux n false tokens with | some (.inr l, rest) => some (l, rest) | _ => none /-- Parse a `MetaArtifact`. -/ def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Token) | 0, _ => none | _+1, Token.ident "Infoductor.MetaArtifact.empty" :: rest => some (MetaArtifact.empty, rest) | _+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.source" :: rest => match rest with | Token.strLit s :: Token.rparen :: rest' => some (MetaArtifact.source s, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.refTo" :: rest => match parseName?Aux n rest with | some (nm, Token.rparen :: rest') => some (MetaArtifact.refTo nm, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.cterm" :: rest => match parseMetaCTerm?Aux n rest with | some (m, Token.rparen :: rest') => some (MetaArtifact.cterm m, rest') | _ => none | n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.declAt" :: rest => match parseSyntax?Aux n rest with | some (s, Token.rparen :: rest') => some (MetaArtifact.declAt s, rest') | _ => none | _, _ => none -- ── Top-level wrappers ───────────────────────────────────────────────────── -- Fuel = `tokens.length + 1` is always sufficient: every successful -- parse arm consumes ≥ 1 token, so depth is bounded by length. def parseName? (tokens : List Token) : Option (Lean.Name × List Token) := parseName?Aux (tokens.length + 1) tokens def parseClassifier? (tokens : List Token) : Option (MetaClassifier × List Token) := parseClassifier?Aux (tokens.length + 1) tokens def parseMetaCTerm? (tokens : List Token) : Option (MetaCTerm × List Token) := parseMetaCTerm?Aux (tokens.length + 1) tokens def parseArtifact? (tokens : List Token) : Option (MetaArtifact × List Token) := parseArtifact?Aux (tokens.length + 1) tokens def MetaCTerm.fromLeanSource? (s : String) : Option MetaCTerm := match parseMetaCTerm? (tokenizeStr s) with | some (t, []) => some t | _ => none def MetaClassifier.fromLeanSource? (s : String) : Option MetaClassifier := match parseClassifier? (tokenizeStr s) with | some (φ, []) => some φ | _ => none def MetaArtifact.fromLeanSource? (s : String) : Option MetaArtifact := match parseArtifact? (tokenizeStr s) with | some (a, []) => some a | _ => none -- ── Canonical token forms — direct meta-mirror → List Token ──────────────── -- Each function mirrors the corresponding `toLeanSource` renderer -- but produces tokens directly, bypassing the String layer. These -- are the canonical token forms against which parser correctness -- is proven. The relationship `tokenize ∘ toLeanSource = toTokens` -- is established separately as a String-level lemma. def nameToTokens : Lean.Name → List Token | .anonymous => [Token.ident "Lean.Name.anonymous"] | .str p s => [Token.lparen, Token.ident "Lean.Name.str"] ++ nameToTokens p ++ [Token.strLit s, Token.rparen] | .num p k => [Token.lparen, Token.ident "Lean.Name.num"] ++ nameToTokens p ++ [Token.numLit k, Token.rparen] def MetaClassifier.toTokens : MetaClassifier → List Token | .always => [Token.ident "Infoductor.MetaClassifier.always"] | .never => [Token.ident "Infoductor.MetaClassifier.never"] | .atDecl n => [Token.lparen, Token.ident "Infoductor.MetaClassifier.atDecl"] ++ nameToTokens n ++ [Token.rparen] | .inFile s => [Token.lparen, Token.ident "Infoductor.MetaClassifier.inFile", Token.strLit s, Token.rparen] | .underAttribute n => [Token.lparen, Token.ident "Infoductor.MetaClassifier.underAttribute"] ++ nameToTokens n ++ [Token.rparen] | .dependencyOf n => [Token.lparen, Token.ident "Infoductor.MetaClassifier.dependencyOf"] ++ nameToTokens n ++ [Token.rparen] | .inNamespace n => [Token.lparen, Token.ident "Infoductor.MetaClassifier.inNamespace"] ++ nameToTokens n ++ [Token.rparen] | .meet a b => [Token.lparen, Token.ident "Infoductor.MetaClassifier.meet"] ++ toTokens a ++ toTokens b ++ [Token.rparen] | .join a b => [Token.lparen, Token.ident "Infoductor.MetaClassifier.join"] ++ toTokens a ++ toTokens b ++ [Token.rparen] def MetaCTerm.toTokens : MetaCTerm → List Token | .ident n => [Token.lparen, Token.ident "Infoductor.MetaCTerm.ident"] ++ nameToTokens n ++ [Token.rparen] | .sym s => [Token.lparen, Token.ident "Infoductor.MetaCTerm.sym", Token.strLit s, Token.rparen] | .app f a => [Token.lparen, Token.ident "Infoductor.MetaCTerm.app"] ++ toTokens f ++ toTokens a ++ [Token.rparen] | .lam x t => [Token.lparen, Token.ident "Infoductor.MetaCTerm.lam", Token.strLit x] ++ toTokens t ++ [Token.rparen] | .plam i t => [Token.lparen, Token.ident "Infoductor.MetaCTerm.plam", Token.strLit i] ++ toTokens t ++ [Token.rparen] | .comp s A φ u t => [Token.lparen, Token.ident "Infoductor.MetaCTerm.comp", Token.strLit s] ++ toTokens A ++ φ.toTokens ++ toTokens u ++ toTokens t ++ [Token.rparen] | .transp s A φ t => [Token.lparen, Token.ident "Infoductor.MetaCTerm.transp", Token.strLit s] ++ toTokens A ++ φ.toTokens ++ toTokens t ++ [Token.rparen] | .empty => [Token.ident "Infoductor.MetaCTerm.empty"] /-- Canonical token form for `String.Pos.Raw`. -/ def stringPosRawToTokens (p : String.Pos.Raw) : List Token := [Token.lparen, Token.ident "String.Pos.Raw.mk", Token.numLit p.byteIdx, Token.rparen] /-- Canonical token form for `Substring.Raw`. -/ def substringRawToTokens (s : Substring.Raw) : List Token := [Token.lparen, Token.ident "Substring.Raw.mk", Token.strLit s.str] ++ stringPosRawToTokens s.startPos ++ stringPosRawToTokens s.stopPos ++ [Token.rparen] /-- Canonical token form for `Bool`. -/ def boolToTokens : Bool → List Token | true => [Token.ident "true"] | false => [Token.ident "false"] /-- Canonical token form for `Lean.SourceInfo`. -/ def sourceInfoToTokens : Lean.SourceInfo → List Token | .original lead pos trail endPos => [Token.lparen, Token.ident "Lean.SourceInfo.original"] ++ substringRawToTokens lead ++ stringPosRawToTokens pos ++ substringRawToTokens trail ++ stringPosRawToTokens endPos ++ [Token.rparen] | .synthetic pos endPos canonical => [Token.lparen, Token.ident "Lean.SourceInfo.synthetic"] ++ stringPosRawToTokens pos ++ stringPosRawToTokens endPos ++ boolToTokens canonical ++ [Token.rparen] | .none => [Token.ident "Lean.SourceInfo.none"] /-- Canonical token form for `List String` (used by `Preresolved.decl`). -/ def stringListToTokens : List String → List Token | [] => [Token.ident "List.nil"] | s :: rest => [Token.lparen, Token.ident "List.cons", Token.strLit s] ++ stringListToTokens rest ++ [Token.rparen] /-- Canonical token form for `Lean.Syntax.Preresolved`. -/ def preresolvedToTokens : Lean.Syntax.Preresolved → List Token | .namespace ns => [Token.lparen, Token.ident "Lean.Syntax.Preresolved.namespace"] ++ nameToTokens ns ++ [Token.rparen] | .decl n fields => [Token.lparen, Token.ident "Lean.Syntax.Preresolved.decl"] ++ nameToTokens n ++ stringListToTokens fields ++ [Token.rparen] /-- Canonical token form for `List Lean.Syntax.Preresolved`. -/ def preresolvedListToTokens : List Lean.Syntax.Preresolved → List Token | [] => [Token.ident "List.nil"] | p :: rest => [Token.lparen, Token.ident "List.cons"] ++ preresolvedToTokens p ++ preresolvedListToTokens rest ++ [Token.rparen] -- Canonical token form for `Lean.Syntax`. Defined via `mutual` so -- recursion is structural on the syntax tree (and on `List Lean.Syntax` -- through `Array.toList`); no fuel parameter needed. Mirrors -- `syntaxToLeanSource` from `Foundation.Meta`. mutual /-- Token form of a `Lean.Syntax`. -/ def syntaxToTokens : Lean.Syntax → List Token | .missing => [Token.ident "Lean.Syntax.missing"] | .atom info val => [Token.lparen, Token.ident "Lean.Syntax.atom"] ++ sourceInfoToTokens info ++ [Token.strLit val, Token.rparen] | .ident info rawVal val preresolved => [Token.lparen, Token.ident "Lean.Syntax.ident"] ++ sourceInfoToTokens info ++ substringRawToTokens rawVal ++ nameToTokens val ++ preresolvedListToTokens preresolved ++ [Token.rparen] | .node info kind args => [Token.lparen, Token.ident "Lean.Syntax.node"] ++ sourceInfoToTokens info ++ nameToTokens kind ++ syntaxListToTokens args.toList ++ [Token.rparen] /-- Token form of a `List Lean.Syntax` — encodes as a `List.cons` / `List.nil` chain. -/ def syntaxListToTokens : List Lean.Syntax → List Token | [] => [Token.ident "List.nil"] | s :: rest => [Token.lparen, Token.ident "List.cons"] ++ syntaxToTokens s ++ syntaxListToTokens rest ++ [Token.rparen] end def MetaArtifact.toTokens : MetaArtifact → List Token | .source s => [Token.lparen, Token.ident "Infoductor.MetaArtifact.source", Token.strLit s, Token.rparen] | .declAt s => [Token.lparen, Token.ident "Infoductor.MetaArtifact.declAt"] ++ syntaxToTokens s ++ [Token.rparen] | .cterm m => [Token.lparen, Token.ident "Infoductor.MetaArtifact.cterm"] ++ m.toTokens ++ [Token.rparen] | .refTo n => [Token.lparen, Token.ident "Infoductor.MetaArtifact.refTo"] ++ nameToTokens n ++ [Token.rparen] | .empty => [Token.ident "Infoductor.MetaArtifact.empty"] -- ── Size measures (for fuel bookkeeping in proofs) ───────────────────────── def nameDepth : Lean.Name → Nat | .anonymous => 1 | .str p _ => nameDepth p + 1 | .num p _ => nameDepth p + 1 def classifierDepth : MetaClassifier → Nat | .always | .never => 1 | .atDecl n => nameDepth n + 1 | .inFile _ => 2 | .underAttribute n => nameDepth n + 1 | .dependencyOf n => nameDepth n + 1 | .inNamespace n => nameDepth n + 1 | .meet a b => max (classifierDepth a) (classifierDepth b) + 1 | .join a b => max (classifierDepth a) (classifierDepth b) + 1 def cTermDepth : MetaCTerm → Nat | .ident n => nameDepth n + 1 | .sym _ => 2 | .empty => 1 | .lam _ t | .plam _ t => cTermDepth t + 1 | .app f a => max (cTermDepth f) (cTermDepth a) + 1 | .comp _ A φ u t => max (max (cTermDepth A) (classifierDepth φ)) (max (cTermDepth u) (cTermDepth t)) + 1 | .transp _ A φ t => max (max (cTermDepth A) (classifierDepth φ)) (cTermDepth t) + 1 /-- Depth measure for `String.Pos.Raw` — atomic 4-token form. -/ def stringPosRawDepth (_ : String.Pos.Raw) : Nat := 1 /-- Depth measure for `Substring.Raw`. -/ def substringRawDepth (_ : Substring.Raw) : Nat := 2 /-- Depth measure for `Lean.SourceInfo`. -/ def sourceInfoDepth : Lean.SourceInfo → Nat | .original _ _ _ _ => 3 | .synthetic _ _ _ => 3 | .none => 1 /-- Depth measure for `List String`. -/ def stringListDepth : List String → Nat | [] => 1 | _ :: rest => stringListDepth rest + 1 /-- Depth measure for `Lean.Syntax.Preresolved`. -/ def preresolvedDepth : Lean.Syntax.Preresolved → Nat | .namespace n => nameDepth n + 1 | .decl n fields => max (nameDepth n) (stringListDepth fields) + 1 /-- Depth measure for `List Lean.Syntax.Preresolved`. -/ def preresolvedListDepth : List Lean.Syntax.Preresolved → Nat | [] => 1 | p :: rest => max (preresolvedDepth p) (preresolvedListDepth rest) + 1 -- Structural depth measure for `Lean.Syntax`. Defined via `mutual` -- so recursion is structural on the syntax tree. No fuel parameter -- needed — depth is a true structural function of the input. Includes -- the depths of *all* sub-payloads (sourceInfo, substring, name, -- preresolved-list) so the parser-correctness proof's bookkeeping is -- direct. mutual /-- Structural depth on `Lean.Syntax`. -/ def syntaxParseDepth : Lean.Syntax → Nat | .missing => 1 | .atom info _ => sourceInfoDepth info + 1 | .ident info rawVal n preresolved => max (sourceInfoDepth info) (max (substringRawDepth rawVal) (max (nameDepth n) (preresolvedListDepth preresolved))) + 1 | .node info kind args => max (sourceInfoDepth info) (max (nameDepth kind) (syntaxListParseDepth args.toList)) + 1 /-- Structural depth on `List Lean.Syntax`. -/ def syntaxListParseDepth : List Lean.Syntax → Nat | [] => 1 | s :: rest => max (syntaxParseDepth s) (syntaxListParseDepth rest) + 1 end def artifactDepth : MetaArtifact → Nat | .source _ => 2 | .declAt s => syntaxParseDepth s + 1 | .cterm m => cTermDepth m + 1 | .refTo n => nameDepth n + 1 | .empty => 1 /-- All `MetaArtifact` arms are now supported by the round-trip: `.declAt` payloads are encoded constructively via the `syntaxToTokens` / `parseSyntax?Aux` pair. Retained as a Boolean predicate (constant `true`) for backward compatibility with downstream code that depended on its existence. -/ def MetaArtifact.supported : MetaArtifact → Bool | _ => true -- ── Parser correctness on toTokens (Phase 2) ────────────────────────────── -- Universal theorems witnessing that each parser, when fed the -- canonical token form of a value plus arbitrary trailing tokens, -- reconstructs the value exactly and leaves the trailing tokens -- untouched. Provided fuel is at least the depth of the value. -- Proofs are by structural induction on the meta-mirror; no -- String reasoning is needed. theorem parseName?Aux_correct : ∀ (n : Lean.Name) (rest : List Token) (fuel : Nat), fuel ≥ nameDepth n → parseName?Aux fuel (nameToTokens n ++ rest) = some (n, rest) | .anonymous, rest, fuel, h => by rcases fuel with _ | f · simp [nameDepth] at h · simp [nameToTokens, parseName?Aux] | .str p s, rest, fuel, h => by rcases fuel with _ | f · simp [nameDepth] at h · simp only [nameToTokens, List.append_assoc, List.cons_append, List.nil_append, parseName?Aux] have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega have ih := parseName?Aux_correct p (Token.strLit s :: Token.rparen :: rest) f hp rw [ih] | .num p k, rest, fuel, h => by rcases fuel with _ | f · simp [nameDepth] at h · simp only [nameToTokens, List.append_assoc, List.cons_append, List.nil_append, parseName?Aux] have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega have ih := parseName?Aux_correct p (Token.numLit k :: Token.rparen :: rest) f hp rw [ih] theorem parseClassifier?Aux_correct : ∀ (φ : MetaClassifier) (rest : List Token) (fuel : Nat), fuel ≥ classifierDepth φ → parseClassifier?Aux fuel (φ.toTokens ++ rest) = some (φ, rest) | .always, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp [MetaClassifier.toTokens, parseClassifier?Aux] | .never, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp [MetaClassifier.toTokens, parseClassifier?Aux] | .atDecl n, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseClassifier?Aux] have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega rw [parseName?Aux_correct n (Token.rparen :: rest) f hn] | .inFile s, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp [MetaClassifier.toTokens, parseClassifier?Aux] | .underAttribute n, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseClassifier?Aux] have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega rw [parseName?Aux_correct n (Token.rparen :: rest) f hn] | .dependencyOf n, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseClassifier?Aux] have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega rw [parseName?Aux_correct n (Token.rparen :: rest) f hn] | .inNamespace n, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseClassifier?Aux] have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega rw [parseName?Aux_correct n (Token.rparen :: rest) f hn] | .meet a b, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseClassifier?Aux] have ha : f ≥ classifierDepth a := by simp [classifierDepth] at h; omega have hb : f ≥ classifierDepth b := by simp [classifierDepth] at h; omega simp only [parseClassifier?Aux_correct a _ f ha, parseClassifier?Aux_correct b _ f hb] | .join a b, rest, fuel, h => by rcases fuel with _ | f · simp [classifierDepth] at h · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseClassifier?Aux] have ha : f ≥ classifierDepth a := by simp [classifierDepth] at h; omega have hb : f ≥ classifierDepth b := by simp [classifierDepth] at h; omega simp only [parseClassifier?Aux_correct a _ f ha, parseClassifier?Aux_correct b _ f hb] theorem parseMetaCTerm?Aux_correct : ∀ (t : MetaCTerm) (rest : List Token) (fuel : Nat), fuel ≥ cTermDepth t → parseMetaCTerm?Aux fuel (t.toTokens ++ rest) = some (t, rest) | .empty, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp [MetaCTerm.toTokens, parseMetaCTerm?Aux] | .ident n, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseMetaCTerm?Aux] have hn : f ≥ nameDepth n := by simp [cTermDepth] at h; omega rw [parseName?Aux_correct n (Token.rparen :: rest) f hn] | .sym s, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp [MetaCTerm.toTokens, parseMetaCTerm?Aux] | .app fa aa, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseMetaCTerm?Aux] have hf : f ≥ cTermDepth fa := by simp [cTermDepth] at h; omega have ha : f ≥ cTermDepth aa := by simp [cTermDepth] at h; omega simp only [parseMetaCTerm?Aux_correct fa _ f hf, parseMetaCTerm?Aux_correct aa _ f ha] | .lam x t, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseMetaCTerm?Aux] have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega simp only [parseMetaCTerm?Aux_correct t _ f ht] | .plam i t, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseMetaCTerm?Aux] have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega simp only [parseMetaCTerm?Aux_correct t _ f ht] | .comp s A φ u t, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseMetaCTerm?Aux] have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega have hu : f ≥ cTermDepth u := by simp [cTermDepth] at h; omega have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega simp only [parseMetaCTerm?Aux_correct A _ f hA, parseClassifier?Aux_correct φ _ f hφ, parseMetaCTerm?Aux_correct u _ f hu, parseMetaCTerm?Aux_correct t _ f ht] | .transp s A φ t, rest, fuel, h => by rcases fuel with _ | f · simp [cTermDepth] at h · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseMetaCTerm?Aux] have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega simp only [parseMetaCTerm?Aux_correct A _ f hA, parseClassifier?Aux_correct φ _ f hφ, parseMetaCTerm?Aux_correct t _ f ht] -- ── Correctness for Lean.Syntax mirror parsers ───────────────────────────── -- Each parser proves: feeding the canonical token form of a value plus -- arbitrary trailing tokens reconstructs the value, leaving the trail -- untouched, provided fuel ≥ value's depth. theorem parseStringPosRaw?Aux_correct : ∀ (p : String.Pos.Raw) (rest : List Token) (fuel : Nat), fuel ≥ stringPosRawDepth p → parseStringPosRaw?Aux fuel (stringPosRawToTokens p ++ rest) = some (p, rest) := by intro p rest fuel h rcases fuel with _ | f · simp [stringPosRawDepth] at h · simp [stringPosRawToTokens, parseStringPosRaw?Aux] theorem parseSubstringRaw?Aux_correct : ∀ (s : Substring.Raw) (rest : List Token) (fuel : Nat), fuel ≥ substringRawDepth s → parseSubstringRaw?Aux fuel (substringRawToTokens s ++ rest) = some (s, rest) := by intro s rest fuel h rcases fuel with _ | f · simp [substringRawDepth] at h · simp only [substringRawToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSubstringRaw?Aux] have h1 : f ≥ stringPosRawDepth s.startPos := by simp [substringRawDepth, stringPosRawDepth] at h ⊢; omega have h2 : f ≥ stringPosRawDepth s.stopPos := by simp [substringRawDepth, stringPosRawDepth] at h ⊢; omega simp only [parseStringPosRaw?Aux_correct s.startPos _ f h1, parseStringPosRaw?Aux_correct s.stopPos _ f h2] theorem parseBool?Aux_correct : ∀ (b : Bool) (rest : List Token), parseBool?Aux (boolToTokens b ++ rest) = some (b, rest) := by intro b rest cases b <;> simp [boolToTokens, parseBool?Aux] theorem parseSourceInfo?Aux_correct : ∀ (info : Lean.SourceInfo) (rest : List Token) (fuel : Nat), fuel ≥ sourceInfoDepth info → parseSourceInfo?Aux fuel (sourceInfoToTokens info ++ rest) = some (info, rest) := by intro info rest fuel h cases info with | none => rcases fuel with _ | f · simp [sourceInfoDepth] at h · simp [sourceInfoToTokens, parseSourceInfo?Aux] | original lead pos trail endPos => rcases fuel with _ | f · simp [sourceInfoDepth] at h · simp only [sourceInfoToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSourceInfo?Aux] have hlead : f ≥ substringRawDepth lead := by simp [sourceInfoDepth, substringRawDepth] at h ⊢; omega have hpos : f ≥ stringPosRawDepth pos := by simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega have htrail : f ≥ substringRawDepth trail := by simp [sourceInfoDepth, substringRawDepth] at h ⊢; omega have hend : f ≥ stringPosRawDepth endPos := by simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega simp only [parseSubstringRaw?Aux_correct lead _ f hlead, parseStringPosRaw?Aux_correct pos _ f hpos, parseSubstringRaw?Aux_correct trail _ f htrail, parseStringPosRaw?Aux_correct endPos _ f hend] | synthetic pos endPos canonical => rcases fuel with _ | f · simp [sourceInfoDepth] at h · simp only [sourceInfoToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSourceInfo?Aux] have hpos : f ≥ stringPosRawDepth pos := by simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega have hend : f ≥ stringPosRawDepth endPos := by simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega simp only [parseStringPosRaw?Aux_correct pos _ f hpos, parseStringPosRaw?Aux_correct endPos _ f hend, parseBool?Aux_correct canonical (Token.rparen :: rest)] theorem parseStringList?Aux_correct : ∀ (l : List String) (rest : List Token) (fuel : Nat), fuel ≥ stringListDepth l → parseStringList?Aux fuel (stringListToTokens l ++ rest) = some (l, rest) | [], rest, fuel, h => by rcases fuel with _ | f · simp [stringListDepth] at h · simp [stringListToTokens, parseStringList?Aux] | s :: tail, rest, fuel, h => by rcases fuel with _ | f · simp [stringListDepth] at h · simp only [stringListToTokens, List.append_assoc, List.cons_append, List.nil_append, parseStringList?Aux] have htail : f ≥ stringListDepth tail := by simp [stringListDepth] at h; omega simp only [parseStringList?Aux_correct tail _ f htail] theorem parsePreresolved?Aux_correct : ∀ (p : Lean.Syntax.Preresolved) (rest : List Token) (fuel : Nat), fuel ≥ preresolvedDepth p → parsePreresolved?Aux fuel (preresolvedToTokens p ++ rest) = some (p, rest) | .namespace ns, rest, fuel, h => by rcases fuel with _ | f · simp [preresolvedDepth] at h · simp only [preresolvedToTokens, List.append_assoc, List.cons_append, List.nil_append, parsePreresolved?Aux] have hn : f ≥ nameDepth ns := by simp [preresolvedDepth] at h; omega simp only [parseName?Aux_correct ns _ f hn] | .decl n fields, rest, fuel, h => by rcases fuel with _ | f · simp [preresolvedDepth] at h · simp only [preresolvedToTokens, List.append_assoc, List.cons_append, List.nil_append, parsePreresolved?Aux] have hn : f ≥ nameDepth n := by simp [preresolvedDepth] at h; omega have hf : f ≥ stringListDepth fields := by simp [preresolvedDepth] at h; omega simp only [parseName?Aux_correct n _ f hn, parseStringList?Aux_correct fields _ f hf] theorem parsePreresolvedList?Aux_correct : ∀ (l : List Lean.Syntax.Preresolved) (rest : List Token) (fuel : Nat), fuel ≥ preresolvedListDepth l → parsePreresolvedList?Aux fuel (preresolvedListToTokens l ++ rest) = some (l, rest) | [], rest, fuel, h => by rcases fuel with _ | f · simp [preresolvedListDepth] at h · simp [preresolvedListToTokens, parsePreresolvedList?Aux] | p :: tail, rest, fuel, h => by rcases fuel with _ | f · simp [preresolvedListDepth] at h · simp only [preresolvedListToTokens, List.append_assoc, List.cons_append, List.nil_append, parsePreresolvedList?Aux] have hp : f ≥ preresolvedDepth p := by simp [preresolvedListDepth] at h; omega have ht : f ≥ preresolvedListDepth tail := by simp [preresolvedListDepth] at h; omega simp only [parsePreresolved?Aux_correct p _ f hp, parsePreresolvedList?Aux_correct tail _ f ht] /-- Round-trip for the joint syntax / list-syntax parser. The encoder is structural (no fuel) — the parser still needs fuel because it consumes a token list of unknown length; fuel ≥ structural depth of the value being parsed. -/ theorem parseSyntaxOrList?Aux_correct : ∀ (fuel : Nat) (x : Lean.Syntax ⊕ List Lean.Syntax) (rest : List Token), fuel ≥ Sum.elim syntaxParseDepth syntaxListParseDepth x → parseSyntaxOrList?Aux fuel (Sum.elim (fun _ => true) (fun _ => false) x) (Sum.elim syntaxToTokens syntaxListToTokens x ++ rest) = some (x, rest) | 0, x, _, h => by cases x with | inl s => simp only [Sum.elim_inl] at h cases s <;> simp [syntaxParseDepth] at h | inr l => simp only [Sum.elim_inr] at h cases l <;> simp [syntaxListParseDepth] at h | f + 1, .inl .missing, rest, _ => by simp [syntaxToTokens, parseSyntaxOrList?Aux] | f + 1, .inl (.atom info val), rest, h => by simp only [Sum.elim_inl, syntaxToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSyntaxOrList?Aux] have hi : f ≥ sourceInfoDepth info := by simp only [Sum.elim_inl, syntaxParseDepth] at h; omega simp only [parseSourceInfo?Aux_correct info _ f hi] | f + 1, .inl (.ident info rawVal val preresolved), rest, h => by simp only [Sum.elim_inl, syntaxToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSyntaxOrList?Aux] simp only [Sum.elim_inl, syntaxParseDepth] at h -- Manually decompose the four-arm max: f + 1 ≥ max info (max raw (max name pre)) + 1. have hi : f ≥ sourceInfoDepth info := by have := Nat.le_max_left (sourceInfoDepth info) (max (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved))) omega have hr : f ≥ substringRawDepth rawVal := by have h1 := Nat.le_max_right (sourceInfoDepth info) (max (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved))) have h2 := Nat.le_max_left (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved)) omega have hn : f ≥ nameDepth val := by have h1 := Nat.le_max_right (sourceInfoDepth info) (max (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved))) have h2 := Nat.le_max_right (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved)) have h3 := Nat.le_max_left (nameDepth val) (preresolvedListDepth preresolved) omega have hp : f ≥ preresolvedListDepth preresolved := by have h1 := Nat.le_max_right (sourceInfoDepth info) (max (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved))) have h2 := Nat.le_max_right (substringRawDepth rawVal) (max (nameDepth val) (preresolvedListDepth preresolved)) have h3 := Nat.le_max_right (nameDepth val) (preresolvedListDepth preresolved) omega simp only [parseSourceInfo?Aux_correct info _ f hi, parseSubstringRaw?Aux_correct rawVal _ f hr, parseName?Aux_correct val _ f hn, parsePreresolvedList?Aux_correct preresolved _ f hp] | f + 1, .inl (.node info kind args), rest, h => by simp only [Sum.elim_inl, syntaxToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSyntaxOrList?Aux] simp only [Sum.elim_inl, syntaxParseDepth] at h have hi : f ≥ sourceInfoDepth info := by have := Nat.le_max_left (sourceInfoDepth info) (max (nameDepth kind) (syntaxListParseDepth args.toList)) omega have hk : f ≥ nameDepth kind := by have h1 := Nat.le_max_right (sourceInfoDepth info) (max (nameDepth kind) (syntaxListParseDepth args.toList)) have h2 := Nat.le_max_left (nameDepth kind) (syntaxListParseDepth args.toList) omega have hargs_d : f ≥ syntaxListParseDepth args.toList := by have h1 := Nat.le_max_right (sourceInfoDepth info) (max (nameDepth kind) (syntaxListParseDepth args.toList)) have h2 := Nat.le_max_right (nameDepth kind) (syntaxListParseDepth args.toList) omega have ih := parseSyntaxOrList?Aux_correct f (.inr args.toList) (Token.rparen :: rest) (by simp only [Sum.elim_inr]; exact hargs_d) simp only [Sum.elim_inr] at ih simp only [parseSourceInfo?Aux_correct info _ f hi, parseName?Aux_correct kind _ f hk, ih, Array.toArray_toList] | f + 1, .inr [], rest, _ => by simp [syntaxListToTokens, parseSyntaxOrList?Aux] | f + 1, .inr (s :: tail), rest, h => by simp only [Sum.elim_inr, syntaxListToTokens, List.append_assoc, List.cons_append, List.nil_append, parseSyntaxOrList?Aux] simp only [Sum.elim_inr, syntaxListParseDepth] at h have hs_d : f ≥ syntaxParseDepth s := by have := Nat.le_max_left (syntaxParseDepth s) (syntaxListParseDepth tail) omega have ht_d : f ≥ syntaxListParseDepth tail := by have := Nat.le_max_right (syntaxParseDepth s) (syntaxListParseDepth tail) omega have ihs := parseSyntaxOrList?Aux_correct f (.inl s) (syntaxListToTokens tail ++ Token.rparen :: rest) (by simp only [Sum.elim_inl]; exact hs_d) have iht := parseSyntaxOrList?Aux_correct f (.inr tail) (Token.rparen :: rest) (by simp only [Sum.elim_inr]; exact ht_d) simp only [Sum.elim_inl] at ihs simp only [Sum.elim_inr] at iht rw [ihs] simp only [iht] /-- Specialisation: round-trip for `parseSyntax?Aux` projection. -/ theorem parseSyntax?Aux_correct : ∀ (s : Lean.Syntax) (rest : List Token) (fuel : Nat), fuel ≥ syntaxParseDepth s → parseSyntax?Aux fuel (syntaxToTokens s ++ rest) = some (s, rest) := by intro s rest fuel h show (match parseSyntaxOrList?Aux fuel true (syntaxToTokens s ++ rest) with | some (.inl s, r) => some (s, r) | _ => none) = some (s, rest) have := parseSyntaxOrList?Aux_correct fuel (.inl s) rest (by simp only [Sum.elim_inl]; exact h) simp only [Sum.elim_inl] at this rw [this] theorem parseArtifact?Aux_correct : ∀ (a : MetaArtifact) (rest : List Token) (fuel : Nat), a.supported = true → fuel ≥ artifactDepth a → parseArtifact?Aux fuel (a.toTokens ++ rest) = some (a, rest) | .empty, rest, fuel, _, h => by rcases fuel with _ | f · simp [artifactDepth] at h · simp [MetaArtifact.toTokens, parseArtifact?Aux] | .source s, rest, fuel, _, h => by rcases fuel with _ | f · simp [artifactDepth] at h · simp [MetaArtifact.toTokens, parseArtifact?Aux] | .refTo n, rest, fuel, _, h => by rcases fuel with _ | f · simp [artifactDepth] at h · simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseArtifact?Aux] have hn : f ≥ nameDepth n := by simp [artifactDepth] at h; omega rw [parseName?Aux_correct n (Token.rparen :: rest) f hn] | .cterm m, rest, fuel, _, h => by rcases fuel with _ | f · simp [artifactDepth] at h · simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseArtifact?Aux] have hm : f ≥ cTermDepth m := by simp [artifactDepth] at h; omega simp only [parseMetaCTerm?Aux_correct m _ f hm] | .declAt s, rest, fuel, _, h => by rcases fuel with _ | f · simp [artifactDepth] at h · simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append, List.nil_append, parseArtifact?Aux] -- The inner parser is now called at `f` fuel (we'll fix the -- parser body to use `f` rather than `syntaxFuelCap` once below). have hs : f ≥ syntaxParseDepth s := by simp [artifactDepth] at h; omega rw [parseSyntax?Aux_correct s (Token.rparen :: rest) f hs] -- ── Token-level round-trip — the canonical universal theorems ───────────── -- These follow directly from the `parser?Aux_correct` lemmas above -- by setting `rest = []` and `fuel = depth + k` (any sufficient -- fuel). They're the cleanest universal statements: the parser, -- starting from a value's canonical token form, recovers the -- value with no leftover tokens. No String reasoning needed. /-- Top-level token-entry parser. Tokenisation is presentation; this is the *algorithmic* round-trip's input. -/ def MetaCTerm.fromTokens? (tokens : List Token) : Option MetaCTerm := match parseMetaCTerm? tokens with | some (t, []) => some t | _ => none def MetaClassifier.fromTokens? (tokens : List Token) : Option MetaClassifier := match parseClassifier? tokens with | some (φ, []) => some φ | _ => none def Infoductor.nameFromTokens? (tokens : List Token) : Option Lean.Name := match parseName? tokens with | some (n, []) => some n | _ => none def MetaArtifact.fromTokens? (tokens : List Token) : Option MetaArtifact := match parseArtifact? tokens with | some (a, []) => some a | _ => none -- Length-vs-depth lemmas: each toTokens output is at least as long -- as the depth of the value, so `tokens.length + 1` is always -- sufficient fuel. theorem nameToTokens_length_bound (n : Lean.Name) : (nameToTokens n).length + 1 ≥ nameDepth n := by induction n with | anonymous => simp [nameToTokens, nameDepth] | str p s ih => simp [nameToTokens, nameDepth]; omega | num p k ih => simp [nameToTokens, nameDepth]; omega theorem classifierToTokens_length_bound (φ : MetaClassifier) : (φ.toTokens).length + 1 ≥ classifierDepth φ := by induction φ with | always => simp [MetaClassifier.toTokens, classifierDepth] | never => simp [MetaClassifier.toTokens, classifierDepth] | atDecl n => simp [MetaClassifier.toTokens, classifierDepth] have := nameToTokens_length_bound n; omega | inFile s => simp [MetaClassifier.toTokens, classifierDepth] | underAttribute n => simp [MetaClassifier.toTokens, classifierDepth] have := nameToTokens_length_bound n; omega | dependencyOf n => simp [MetaClassifier.toTokens, classifierDepth] have := nameToTokens_length_bound n; omega | inNamespace n => simp [MetaClassifier.toTokens, classifierDepth] have := nameToTokens_length_bound n; omega | meet a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega | join a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega theorem cTermToTokens_length_bound (t : MetaCTerm) : (t.toTokens).length + 1 ≥ cTermDepth t := by induction t with | empty => simp [MetaCTerm.toTokens, cTermDepth] | ident n => simp [MetaCTerm.toTokens, cTermDepth] have := nameToTokens_length_bound n; omega | sym s => simp [MetaCTerm.toTokens, cTermDepth] | app f a ihf iha => simp [MetaCTerm.toTokens, cTermDepth]; omega | lam x t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega | plam i t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega | comp s A φ u t ihA ihu iht => simp [MetaCTerm.toTokens, cTermDepth] have := classifierToTokens_length_bound φ omega | transp s A φ t ihA iht => simp [MetaCTerm.toTokens, cTermDepth] have := classifierToTokens_length_bound φ omega -- Length-vs-depth for the helper-syntax encoders. theorem stringPosRawToTokens_length_bound (p : String.Pos.Raw) : (stringPosRawToTokens p).length + 1 ≥ stringPosRawDepth p := by simp [stringPosRawToTokens, stringPosRawDepth] theorem substringRawToTokens_length_bound (s : Substring.Raw) : (substringRawToTokens s).length + 1 ≥ substringRawDepth s := by simp [substringRawToTokens, substringRawDepth, stringPosRawToTokens] theorem sourceInfoToTokens_length_bound (info : Lean.SourceInfo) : (sourceInfoToTokens info).length + 1 ≥ sourceInfoDepth info := by cases info <;> simp [sourceInfoToTokens, sourceInfoDepth, substringRawToTokens, stringPosRawToTokens, boolToTokens] theorem stringListToTokens_length_bound (l : List String) : (stringListToTokens l).length + 1 ≥ stringListDepth l := by induction l with | nil => simp [stringListToTokens, stringListDepth] | cons s rest ih => simp [stringListToTokens, stringListDepth]; omega theorem preresolvedToTokens_length_bound : ∀ (p : Lean.Syntax.Preresolved), (preresolvedToTokens p).length + 1 ≥ preresolvedDepth p | .namespace ns => by simp [preresolvedToTokens, preresolvedDepth] have := nameToTokens_length_bound ns; omega | .decl n fields => by simp [preresolvedToTokens, preresolvedDepth] have h1 := nameToTokens_length_bound n have h2 := stringListToTokens_length_bound fields omega theorem preresolvedListToTokens_length_bound (l : List Lean.Syntax.Preresolved) : (preresolvedListToTokens l).length + 1 ≥ preresolvedListDepth l := by induction l with | nil => simp [preresolvedListToTokens, preresolvedListDepth] | cons p rest ih => simp [preresolvedListToTokens, preresolvedListDepth] have := preresolvedToTokens_length_bound p omega -- Length-vs-depth for the syntax encoder. Joint mutual recursion -- because the encoder and depth function are themselves mutually -- recursive on `Lean.Syntax` / `List Lean.Syntax`. mutual /-- Length bound on a `Lean.Syntax` encoding. -/ theorem syntaxToTokens_length_bound : ∀ (s : Lean.Syntax), (syntaxToTokens s).length + 1 ≥ syntaxParseDepth s | .missing => by simp [syntaxToTokens, syntaxParseDepth] | .atom info _ => by simp [syntaxToTokens, syntaxParseDepth] have := sourceInfoToTokens_length_bound info omega | .ident info rawVal val preresolved => by simp [syntaxToTokens, syntaxParseDepth] have h1 := sourceInfoToTokens_length_bound info have h2 := substringRawToTokens_length_bound rawVal have h3 := nameToTokens_length_bound val have h4 := preresolvedListToTokens_length_bound preresolved omega | .node info kind args => by simp [syntaxToTokens, syntaxParseDepth] have h1 := sourceInfoToTokens_length_bound info have h2 := nameToTokens_length_bound kind have h3 := syntaxListToTokens_length_bound args.toList omega /-- Length bound on a `List Lean.Syntax` encoding. -/ theorem syntaxListToTokens_length_bound : ∀ (l : List Lean.Syntax), (syntaxListToTokens l).length + 1 ≥ syntaxListParseDepth l | [] => by simp [syntaxListToTokens, syntaxListParseDepth] | s :: rest => by simp [syntaxListToTokens, syntaxListParseDepth] have h1 := syntaxToTokens_length_bound s have h2 := syntaxListToTokens_length_bound rest omega end /-- Every meta-mirror value's canonical token form parses back to the same value. Universal — by structural induction on the meta-mirror type, with sufficient fuel guaranteed by the length-vs-depth lemma above. -/ theorem nameFromTokens?_round_trip (n : Lean.Name) : Infoductor.nameFromTokens? (nameToTokens n) = some n := by unfold Infoductor.nameFromTokens? parseName? have h := nameToTokens_length_bound n have := parseName?Aux_correct n [] ((nameToTokens n).length + 1) h rw [List.append_nil] at this rw [this] theorem classifierFromTokens?_round_trip (φ : MetaClassifier) : MetaClassifier.fromTokens? φ.toTokens = some φ := by unfold MetaClassifier.fromTokens? parseClassifier? have h := classifierToTokens_length_bound φ have := parseClassifier?Aux_correct φ [] ((φ.toTokens).length + 1) h rw [List.append_nil] at this rw [this] theorem cTermFromTokens?_round_trip (t : MetaCTerm) : MetaCTerm.fromTokens? t.toTokens = some t := by unfold MetaCTerm.fromTokens? parseMetaCTerm? have h := cTermToTokens_length_bound t have := parseMetaCTerm?Aux_correct t [] ((t.toTokens).length + 1) h rw [List.append_nil] at this rw [this] theorem artifactFromTokens?_round_trip (a : MetaArtifact) (hsup : a.supported = true) : MetaArtifact.fromTokens? a.toTokens = some a := by unfold MetaArtifact.fromTokens? parseArtifact? -- artifact length bound have hlen : (a.toTokens).length + 1 ≥ artifactDepth a := by cases a with | empty => simp [MetaArtifact.toTokens, artifactDepth] | source s => simp [MetaArtifact.toTokens, artifactDepth] | declAt s => simp [MetaArtifact.toTokens, artifactDepth] have := syntaxToTokens_length_bound s omega | cterm m => simp [MetaArtifact.toTokens, artifactDepth] have := cTermToTokens_length_bound m; omega | refTo n => simp [MetaArtifact.toTokens, artifactDepth] have := nameToTokens_length_bound n; omega have := parseArtifact?Aux_correct a [] ((a.toTokens).length + 1) hsup hlen rw [List.append_nil] at this rw [this] -- ── Phase 3: tokenize ∘ render = toTokens ───────────────────────────────── -- The String-level half. Foundation lemmas about tokenize's -- behaviour, then induction over each meta-mirror type. /-- `tokenize` on `(` :: rest reduces to `lparen :: tokenize rest`. -/ theorem tokenize_lparen_cons (rest : List Char) : tokenize ('(' :: rest) = Token.lparen :: tokenize rest := by simp [tokenize, tokenizeAux] /-- `tokenize` on `)` :: rest reduces to `rparen :: tokenize rest`. -/ theorem tokenize_rparen_cons (rest : List Char) : tokenize (')' :: rest) = Token.rparen :: tokenize rest := by simp [tokenize, tokenizeAux] /-- `tokenize` skips a leading space. -/ theorem tokenize_space_cons (rest : List Char) : tokenize (' ' :: rest) = tokenize rest := by simp [tokenize, tokenizeAux, isWhitespace] -- Atomic-arm Phase 3 witnesses via `decide`. Each closed input -- reduces in the kernel to a concrete token list; we prove -- structural identity between `tokenize (toLeanSource v).toList` -- and `toTokens v` for these atomic shapes. set_option maxRecDepth 2000 in theorem tokenize_render_name_anonymous : tokenize (nameToLeanSource Lean.Name.anonymous).toList = nameToTokens Lean.Name.anonymous := by decide set_option maxRecDepth 2000 in theorem tokenize_render_classifier_always : tokenize (MetaClassifier.toLeanSource MetaClassifier.always).toList = MetaClassifier.toTokens MetaClassifier.always := by decide set_option maxRecDepth 2000 in theorem tokenize_render_classifier_never : tokenize (MetaClassifier.toLeanSource MetaClassifier.never).toList = MetaClassifier.toTokens MetaClassifier.never := by decide set_option maxRecDepth 2000 in theorem tokenize_render_cterm_empty : tokenize (MetaCTerm.toLeanSource MetaCTerm.empty).toList = MetaCTerm.toTokens MetaCTerm.empty := by decide set_option maxRecDepth 2000 in theorem tokenize_render_artifact_empty : tokenize (MetaArtifact.toLeanSource MetaArtifact.empty).toList = MetaArtifact.toTokens MetaArtifact.empty := by decide -- ── Phase 3 distribution lemmas — open question ──────────────────────────── -- Proving `tokenize ∘ render = toTokens` requires four substantial -- distribution lemmas: -- -- readIdent_app : reading an ident sequence followed by a clean -- boundary returns the accumulated ident. -- readStrLit_app: reading an escapeStrLit-encoded body returns -- the original String. -- tokenize_app_clean: tokenize distributes over a concatenation -- where the prefix ends "cleanly". -- tokenize_render_X: induction over each meta-mirror type. -- -- Each requires reasoning about Lean 4.30's `String` API (UTF-8 -- ByteArray-backed) — specifically the equation -- `s.push c ++ xs.asString = s ++ (c :: xs).asString`, which is -- *not* definitionally true and needs a String-internals lemma. -- -- Cleaner paths forward: (a) refactor `readIdent`/`readStrLit` to -- accumulate into a `List Char` rather than a `String`, then build -- the String at the end via `List.asString` — decouples the proof -- from String internals; or (b) import Mathlib's richer String -- API which provides the needed lemmas. -- ── Round-trip — atomic kernel-reducible witnesses ───────────────────────── -- For non-recursive shapes the round-trip is closed by `rfl` (or -- `decide`) directly: rendering produces a fixed string, tokenising -- produces a fixed token list, parsing returns the original. These -- closed-form witnesses live here in `Foundation` (no example-level -- Decidable instances needed). theorem MetaCTerm.toLeanSource_empty_round_trip : MetaCTerm.fromLeanSource? MetaCTerm.empty.toLeanSource = some MetaCTerm.empty := by decide theorem MetaClassifier.toLeanSource_always_round_trip : MetaClassifier.fromLeanSource? MetaClassifier.always.toLeanSource = some MetaClassifier.always := by decide theorem MetaClassifier.toLeanSource_never_round_trip : MetaClassifier.fromLeanSource? MetaClassifier.never.toLeanSource = some MetaClassifier.never := by decide theorem MetaArtifact.toLeanSource_empty_round_trip_render : (MetaArtifact.fromLeanSource? MetaArtifact.empty.toLeanSource).map MetaArtifact.toLeanSource = some MetaArtifact.empty.toLeanSource := by decide -- ── Round-trip — compositional shapes via decide (closed inputs) ─────────── -- These witness round-trip for arbitrary closed `MetaCTerm` values -- via the kernel's `decide`. Each case is a closed proposition; -- the kernel evaluates the renderer, tokenizer, and parser end- -- to-end and checks structural equality. theorem MetaCTerm.toLeanSource_ident_anonymous_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.ident Lean.Name.anonymous).toLeanSource = some (MetaCTerm.ident Lean.Name.anonymous) := by decide theorem MetaCTerm.toLeanSource_app_empty_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.app .empty .empty).toLeanSource = some (MetaCTerm.app .empty .empty) := by decide set_option maxRecDepth 4000 in theorem MetaCTerm.toLeanSource_lam_x_empty_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.lam "x" .empty).toLeanSource = some (MetaCTerm.lam "x" .empty) := by decide set_option maxRecDepth 4000 in theorem MetaCTerm.toLeanSource_comp_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.comp "i" .empty .always .empty .empty).toLeanSource = some (MetaCTerm.comp "i" .empty .always .empty .empty) := by decide set_option maxRecDepth 4000 in theorem MetaCTerm.toLeanSource_transp_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.transp "i" .empty .never .empty).toLeanSource = some (MetaCTerm.transp "i" .empty .never .empty) := by decide set_option maxRecDepth 4000 in theorem MetaCTerm.toLeanSource_plam_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.plam "i" .empty).toLeanSource = some (MetaCTerm.plam "i" .empty) := by decide set_option maxRecDepth 4000 in theorem MetaCTerm.toLeanSource_sym_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.sym "x").toLeanSource = some (MetaCTerm.sym "x") := by decide -- Compositional witness: a `.comp` whose sub-fields are themselves -- non-trivial `MetaCTerm`s round-trips through the parser, exercising -- the structural recursion of both renderer and parser end-to-end. set_option maxRecDepth 16000 in theorem MetaCTerm.toLeanSource_nested_round_trip : MetaCTerm.fromLeanSource? (MetaCTerm.comp "i" (.ident `Univ) .always (.lam "x" (.sym "y")) (.sym "z")).toLeanSource = some (MetaCTerm.comp "i" (.ident `Univ) .always (.lam "x" (.sym "y")) (.sym "z")) := by decide set_option maxRecDepth 4000 in theorem MetaClassifier.toLeanSource_meet_round_trip : MetaClassifier.fromLeanSource? (MetaClassifier.meet .always .never).toLeanSource = some (MetaClassifier.meet .always .never) := by decide set_option maxRecDepth 4000 in theorem MetaClassifier.toLeanSource_atDecl_round_trip : MetaClassifier.fromLeanSource? (MetaClassifier.atDecl `Foo).toLeanSource = some (MetaClassifier.atDecl `Foo) := by decide -- 3-level deep name regression test: `eq0.i` (the FaceFormula -- `eq0` encoding) round-trips. Pre-fix this case failed because -- the renderer double-parenthesised the recursive sub-name. set_option maxRecDepth 4000 in theorem MetaClassifier.toLeanSource_atDecl_three_level_round_trip : MetaClassifier.fromLeanSource? (MetaClassifier.atDecl (Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")).toLeanSource = some (MetaClassifier.atDecl (Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")) := by decide end Infoductor