infoductor/Infoductor/Foundation/MetaParse.lean

/-
  Infoductor.Foundation.MetaParse — the inverse of `toLeanSource`
  =================================================================
  A hand-written tokenizer + recursive-descent parser that reads
  the Lean source emitted by the `toLeanSource` family of renderers
  and reconstructs the original `MetaCTerm` / `MetaClassifier` /
  `MetaArtifact` / `Lean.Name`.

  The bridge's loop closes at the *string* level, in Lean itself:
  no external elaborator needed.

  Implementation strategy:

  · `Token` is a four-arm inductive (parens, ident chains, string
    literals, num literals).
  · `tokenize` is fuel-based, so it's structurally recursive on
    the fuel parameter (no `partial` needed) — fuel is the input
    list length, which is always sufficient.
  · Parsers per type are likewise fuel-based.  Each successful
    parse consumes ≥ 1 token, so fuel = `tokens.length + 1` is
    always enough for any well-formed input.
  · String escape: only `"` and `\` are escaped (renderer-side
    `escapeStrLit` mirrors this exactly).
-/

import Infoductor.Foundation.Meta

namespace Infoductor

-- ── Tokens ──────────────────────────────────────────────────────────────────

inductive Token where
  | lparen  : Token
  | rparen  : Token
  | ident   : String → Token
  | strLit  : String → Token
  | numLit  : Nat    → Token
  deriving Repr, DecidableEq

-- ── Character predicates ───────────────────────────────────────────────────

def isIdentStartChar (c : Char) : Bool :=
  c.isAlpha || c == '_'

def isIdentRestChar (c : Char) : Bool :=
  c.isAlpha || c.isDigit || c == '_' || c == '.'

def isWhitespace (c : Char) : Bool :=
  c == ' ' || c == '\n' || c == '\t' || c == '\r'

-- ── Tokenizer helpers (fuel-based for kernel termination) ──────────────────

/-- Read identifier chars until a non-identifier char.  Returns
    (read string, remaining chars).  Fuel-based for kernel
    termination; the tokenizer feeds in `chars.length` fuel
    which is always sufficient. -/
def readIdent : Nat → List Char → String → String × List Char
  | 0,     chars,        acc => (acc, chars)
  | _+1,   [],           acc => (acc, [])
  | n+1,   c :: rest,    acc =>
      if isIdentRestChar c then readIdent n rest (acc.push c)
      else (acc, c :: rest)

/-- Read a decimal number.  Fuel-based. -/
def readNum : Nat → List Char → Nat → Nat × List Char
  | 0,     chars,       acc => (acc, chars)
  | _+1,   [],          acc => (acc, [])
  | n+1,   c :: rest,   acc =>
      if c.isDigit then
        readNum n rest (acc * 10 + (c.toNat - '0'.toNat))
      else (acc, c :: rest)

/-- Read characters of a string literal until the closing `"`,
    handling `\"` and `\\` escapes.  Fuel-based. -/
def readStrLit : Nat → List Char → String → Option (String × List Char)
  | 0,     _,                               _   => none
  | _+1,   [],                              _   => none
  | _+1,   '"' :: rest,                     acc => some (acc, rest)
  | n+1,   '\\' :: '"' :: rest,             acc => readStrLit n rest (acc.push '"')
  | n+1,   '\\' :: '\\' :: rest,            acc => readStrLit n rest (acc.push '\\')
  | n+1,   c :: rest,                       acc => readStrLit n rest (acc.push c)

-- `escapeStrLit` lives in Foundation.Meta now (used by both
-- the renderer and the parser); see Meta.lean for the definition.

-- ── Tokenizer ──────────────────────────────────────────────────────────────

/-- Tokenize a `List Char` into `List Token`.  Fuel-based for
    kernel termination; the top-level `tokenize` provides
    `chars.length + 1` fuel, which is always sufficient. -/
def tokenizeAux : Nat → List Char → List Token
  | 0,     _              => []
  | _+1,   []             => []
  | n+1,   '(' :: rest    => Token.lparen :: tokenizeAux n rest
  | n+1,   ')' :: rest    => Token.rparen :: tokenizeAux n rest
  | n+1,   '"' :: rest    =>
      match readStrLit (n+1) rest "" with
      | some (s, rest') => Token.strLit s :: tokenizeAux n rest'
      | none            => []
  | n+1,   c :: rest      =>
      if isWhitespace c then tokenizeAux n rest
      else if c.isDigit then
        let (num, rest') := readNum (n+1) (c :: rest) 0
        Token.numLit num :: tokenizeAux n rest'
      else if isIdentStartChar c then
        let (ident, rest') := readIdent (n+1) (c :: rest) ""
        Token.ident ident :: tokenizeAux n rest'
      else
        []

/-- Top-level tokenizer.  Fuel = `chars.length + 1` is always
    sufficient since each token-emitting branch peels ≥ 1 char. -/
def tokenize (chars : List Char) : List Token :=
  tokenizeAux (chars.length + 1) chars

/-- Tokenize a `String` directly. -/
def tokenizeStr (s : String) : List Token :=
  tokenize s.toList

-- ── Parsers ────────────────────────────────────────────────────────────────
-- All parsers fuel-based.  Recursion structurally on the Nat
-- parameter, so they're total without `partial`.

/-- Parse a `Lean.Name`.  Recognises:
      · `Lean.Name.anonymous` (atomic)
      · `(Lean.Name.str <name> "<str>")`  (recursive)
      · `(Lean.Name.num <name> <nat>)`    (recursive)

    The renderer no longer wraps recursive sub-name calls in
    extra parens (post-fix), so the parser handles only the
    canonical bare-atomic + constructor-app forms. -/
def parseName?Aux : Nat → List Token → Option (Lean.Name × List Token)
  | 0,     _    => none
  | _+1,   Token.ident "Lean.Name.anonymous" :: rest =>
      some (Lean.Name.anonymous, rest)
  | n+1,   Token.lparen :: Token.ident "Lean.Name.str" :: rest =>
      match parseName?Aux n rest with
      | some (p, Token.strLit s :: Token.rparen :: rest') =>
          some (Lean.Name.str p s, rest')
      | _ => none
  | n+1,   Token.lparen :: Token.ident "Lean.Name.num" :: rest =>
      match parseName?Aux n rest with
      | some (p, Token.numLit k :: Token.rparen :: rest') =>
          some (Lean.Name.num p k, rest')
      | _ => none
  | _, _ => none

/-- Parse a `MetaClassifier`. -/
def parseClassifier?Aux : Nat → List Token → Option (MetaClassifier × List Token)
  | 0,    _ => none
  | _+1,  Token.ident "Infoductor.MetaClassifier.always" :: rest =>
      some (MetaClassifier.always, rest)
  | _+1,  Token.ident "Infoductor.MetaClassifier.never" :: rest =>
      some (MetaClassifier.never, rest)
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.atDecl" :: rest =>
      match parseName?Aux n rest with
      | some (nm, Token.rparen :: rest') => some (MetaClassifier.atDecl nm, rest')
      | _ => none
  | _+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.inFile" :: rest =>
      match rest with
      | Token.strLit s :: Token.rparen :: rest' =>
          some (MetaClassifier.inFile s, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.underAttribute" :: rest =>
      match parseName?Aux n rest with
      | some (nm, Token.rparen :: rest') =>
          some (MetaClassifier.underAttribute nm, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.dependencyOf" :: rest =>
      match parseName?Aux n rest with
      | some (nm, Token.rparen :: rest') =>
          some (MetaClassifier.dependencyOf nm, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.inNamespace" :: rest =>
      match parseName?Aux n rest with
      | some (nm, Token.rparen :: rest') =>
          some (MetaClassifier.inNamespace nm, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.meet" :: rest =>
      match parseClassifier?Aux n rest with
      | some (a, rest') =>
          match parseClassifier?Aux n rest' with
          | some (b, Token.rparen :: rest'') =>
              some (MetaClassifier.meet a b, rest'')
          | _ => none
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaClassifier.join" :: rest =>
      match parseClassifier?Aux n rest with
      | some (a, rest') =>
          match parseClassifier?Aux n rest' with
          | some (b, Token.rparen :: rest'') =>
              some (MetaClassifier.join a b, rest'')
          | _ => none
      | _ => none
  | _, _ => none

/-- Parse a `MetaCTerm`. -/
def parseMetaCTerm?Aux : Nat → List Token → Option (MetaCTerm × List Token)
  | 0,    _ => none
  | _+1,  Token.ident "Infoductor.MetaCTerm.empty" :: rest =>
      some (MetaCTerm.empty, rest)
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.ident" :: rest =>
      match parseName?Aux n rest with
      | some (nm, Token.rparen :: rest') => some (MetaCTerm.ident nm, rest')
      | _ => none
  | _+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.sym" :: rest =>
      match rest with
      | Token.strLit s :: Token.rparen :: rest' =>
          some (MetaCTerm.sym s, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.app" :: rest =>
      match parseMetaCTerm?Aux n rest with
      | some (f, rest') =>
          match parseMetaCTerm?Aux n rest' with
          | some (a, Token.rparen :: rest'') =>
              some (MetaCTerm.app f a, rest'')
          | _ => none
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.lam" :: rest =>
      match rest with
      | Token.strLit x :: rest' =>
          match parseMetaCTerm?Aux n rest' with
          | some (t, Token.rparen :: rest'') => some (MetaCTerm.lam x t, rest'')
          | _ => none
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.plam" :: rest =>
      match rest with
      | Token.strLit i :: rest' =>
          match parseMetaCTerm?Aux n rest' with
          | some (t, Token.rparen :: rest'') => some (MetaCTerm.plam i t, rest'')
          | _ => none
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.comp" :: rest =>
      match rest with
      | Token.strLit s :: rest1 =>
          match parseMetaCTerm?Aux n rest1 with
          | some (A, rest2) =>
              match parseClassifier?Aux n rest2 with
              | some (φ, rest3) =>
                  match parseMetaCTerm?Aux n rest3 with
                  | some (u, rest4) =>
                      match parseMetaCTerm?Aux n rest4 with
                      | some (t, Token.rparen :: rest5) =>
                          some (MetaCTerm.comp s A φ u t, rest5)
                      | _ => none
                  | _ => none
              | _ => none
          | _ => none
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaCTerm.transp" :: rest =>
      match rest with
      | Token.strLit s :: rest1 =>
          match parseMetaCTerm?Aux n rest1 with
          | some (A, rest2) =>
              match parseClassifier?Aux n rest2 with
              | some (φ, rest3) =>
                  match parseMetaCTerm?Aux n rest3 with
                  | some (t, Token.rparen :: rest4) =>
                      some (MetaCTerm.transp s A φ t, rest4)
                  | _ => none
              | _ => none
          | _ => none
      | _ => none
  | _, _ => none

/-- Parse a `MetaArtifact`. -/
def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Token)
  | 0,    _ => none
  | _+1,  Token.ident "Infoductor.MetaArtifact.empty" :: rest =>
      some (MetaArtifact.empty, rest)
  | _+1,  Token.lparen :: Token.ident "Infoductor.MetaArtifact.source" :: rest =>
      match rest with
      | Token.strLit s :: Token.rparen :: rest' =>
          some (MetaArtifact.source s, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaArtifact.refTo" :: rest =>
      match parseName?Aux n rest with
      | some (nm, Token.rparen :: rest') =>
          some (MetaArtifact.refTo nm, rest')
      | _ => none
  | n+1,  Token.lparen :: Token.ident "Infoductor.MetaArtifact.cterm" :: rest =>
      match parseMetaCTerm?Aux n rest with
      | some (m, Token.rparen :: rest') =>
          some (MetaArtifact.cterm m, rest')
      | _ => none
  | _, _ => none

-- ── Top-level wrappers ─────────────────────────────────────────────────────
-- Fuel = `tokens.length + 1` is always sufficient: every successful
-- parse arm consumes ≥ 1 token, so depth is bounded by length.

def parseName? (tokens : List Token) : Option (Lean.Name × List Token) :=
  parseName?Aux (tokens.length + 1) tokens

def parseClassifier? (tokens : List Token) : Option (MetaClassifier × List Token) :=
  parseClassifier?Aux (tokens.length + 1) tokens

def parseMetaCTerm? (tokens : List Token) : Option (MetaCTerm × List Token) :=
  parseMetaCTerm?Aux (tokens.length + 1) tokens

def parseArtifact? (tokens : List Token) : Option (MetaArtifact × List Token) :=
  parseArtifact?Aux (tokens.length + 1) tokens

def MetaCTerm.fromLeanSource? (s : String) : Option MetaCTerm :=
  match parseMetaCTerm? (tokenizeStr s) with
  | some (t, []) => some t
  | _            => none

def MetaClassifier.fromLeanSource? (s : String) : Option MetaClassifier :=
  match parseClassifier? (tokenizeStr s) with
  | some (φ, []) => some φ
  | _            => none

def MetaArtifact.fromLeanSource? (s : String) : Option MetaArtifact :=
  match parseArtifact? (tokenizeStr s) with
  | some (a, []) => some a
  | _            => none

-- ── Canonical token forms — direct meta-mirror → List Token ────────────────
-- Each function mirrors the corresponding `toLeanSource` renderer
-- but produces tokens directly, bypassing the String layer.  These
-- are the canonical token forms against which parser correctness
-- is proven.  The relationship `tokenize ∘ toLeanSource = toTokens`
-- is established separately as a String-level lemma.

def nameToTokens : Lean.Name → List Token
  | .anonymous => [Token.ident "Lean.Name.anonymous"]
  | .str p s   =>
      [Token.lparen, Token.ident "Lean.Name.str"] ++ nameToTokens p ++
        [Token.strLit s, Token.rparen]
  | .num p k   =>
      [Token.lparen, Token.ident "Lean.Name.num"] ++ nameToTokens p ++
        [Token.numLit k, Token.rparen]

def MetaClassifier.toTokens : MetaClassifier → List Token
  | .always         => [Token.ident "Infoductor.MetaClassifier.always"]
  | .never          => [Token.ident "Infoductor.MetaClassifier.never"]
  | .atDecl n       =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.atDecl"] ++
        nameToTokens n ++ [Token.rparen]
  | .inFile s       =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.inFile",
        Token.strLit s, Token.rparen]
  | .underAttribute n =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.underAttribute"] ++
        nameToTokens n ++ [Token.rparen]
  | .dependencyOf n =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.dependencyOf"] ++
        nameToTokens n ++ [Token.rparen]
  | .inNamespace n  =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.inNamespace"] ++
        nameToTokens n ++ [Token.rparen]
  | .meet a b       =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.meet"] ++
        toTokens a ++ toTokens b ++ [Token.rparen]
  | .join a b       =>
      [Token.lparen, Token.ident "Infoductor.MetaClassifier.join"] ++
        toTokens a ++ toTokens b ++ [Token.rparen]

def MetaCTerm.toTokens : MetaCTerm → List Token
  | .ident n        =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.ident"] ++
        nameToTokens n ++ [Token.rparen]
  | .sym s          =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.sym",
        Token.strLit s, Token.rparen]
  | .app f a        =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.app"] ++
        toTokens f ++ toTokens a ++ [Token.rparen]
  | .lam x t        =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.lam",
        Token.strLit x] ++ toTokens t ++ [Token.rparen]
  | .plam i t       =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.plam",
        Token.strLit i] ++ toTokens t ++ [Token.rparen]
  | .comp s A φ u t =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.comp",
        Token.strLit s] ++ toTokens A ++ φ.toTokens ++
        toTokens u ++ toTokens t ++ [Token.rparen]
  | .transp s A φ t =>
      [Token.lparen, Token.ident "Infoductor.MetaCTerm.transp",
        Token.strLit s] ++ toTokens A ++ φ.toTokens ++
        toTokens t ++ [Token.rparen]
  | .empty          => [Token.ident "Infoductor.MetaCTerm.empty"]

def MetaArtifact.toTokens : MetaArtifact → List Token
  | .source s => [Token.lparen, Token.ident "Infoductor.MetaArtifact.source",
                  Token.strLit s, Token.rparen]
  | .declAt _ => []  -- not source-renderable; toTokens undefined for this arm
  | .cterm m  => [Token.lparen, Token.ident "Infoductor.MetaArtifact.cterm"] ++
                 m.toTokens ++ [Token.rparen]
  | .refTo n  => [Token.lparen, Token.ident "Infoductor.MetaArtifact.refTo"] ++
                 nameToTokens n ++ [Token.rparen]
  | .empty    => [Token.ident "Infoductor.MetaArtifact.empty"]

-- ── Size measures (for fuel bookkeeping in proofs) ─────────────────────────

def nameDepth : Lean.Name → Nat
  | .anonymous => 1
  | .str p _   => nameDepth p + 1
  | .num p _   => nameDepth p + 1

def classifierDepth : MetaClassifier → Nat
  | .always | .never => 1
  | .atDecl n         => nameDepth n + 1
  | .inFile _         => 2
  | .underAttribute n => nameDepth n + 1
  | .dependencyOf n   => nameDepth n + 1
  | .inNamespace n    => nameDepth n + 1
  | .meet a b         => max (classifierDepth a) (classifierDepth b) + 1
  | .join a b         => max (classifierDepth a) (classifierDepth b) + 1

def cTermDepth : MetaCTerm → Nat
  | .ident n => nameDepth n + 1
  | .sym _   => 2
  | .empty   => 1
  | .lam _ t | .plam _ t => cTermDepth t + 1
  | .app f a => max (cTermDepth f) (cTermDepth a) + 1
  | .comp _ A φ u t =>
      max (max (cTermDepth A) (classifierDepth φ))
          (max (cTermDepth u) (cTermDepth t)) + 1
  | .transp _ A φ t =>
      max (max (cTermDepth A) (classifierDepth φ)) (cTermDepth t) + 1

def artifactDepth : MetaArtifact → Nat
  | .source _ => 2
  | .declAt _ => 0  -- not source-encodable; round-trip excluded
  | .cterm m  => cTermDepth m + 1
  | .refTo n  => nameDepth n + 1
  | .empty    => 1

/-- A `MetaArtifact` is *supported* by the round-trip iff it does
    not carry a `Lean.Syntax` (which cannot be source-rendered).
    All other arms are supported. -/
def MetaArtifact.supported : MetaArtifact → Bool
  | .declAt _ => false
  | _         => true

-- ── Parser correctness on toTokens (Phase 2) ──────────────────────────────
-- Universal theorems witnessing that each parser, when fed the
-- canonical token form of a value plus arbitrary trailing tokens,
-- reconstructs the value exactly and leaves the trailing tokens
-- untouched.  Provided fuel is at least the depth of the value.
-- Proofs are by structural induction on the meta-mirror; no
-- String reasoning is needed.

theorem parseName?Aux_correct :
    ∀ (n : Lean.Name) (rest : List Token) (fuel : Nat),
      fuel ≥ nameDepth n →
      parseName?Aux fuel (nameToTokens n ++ rest) = some (n, rest)
  | .anonymous, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [nameDepth] at h
      · simp [nameToTokens, parseName?Aux]
  | .str p s, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [nameDepth] at h
      · simp only [nameToTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseName?Aux]
        have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega
        have ih := parseName?Aux_correct p
                    (Token.strLit s :: Token.rparen :: rest) f hp
        rw [ih]
  | .num p k, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [nameDepth] at h
      · simp only [nameToTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseName?Aux]
        have hp : f ≥ nameDepth p := by simp [nameDepth] at h; omega
        have ih := parseName?Aux_correct p
                    (Token.numLit k :: Token.rparen :: rest) f hp
        rw [ih]

theorem parseClassifier?Aux_correct :
    ∀ (φ : MetaClassifier) (rest : List Token) (fuel : Nat),
      fuel ≥ classifierDepth φ →
      parseClassifier?Aux fuel (φ.toTokens ++ rest) = some (φ, rest)
  | .always, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp [MetaClassifier.toTokens, parseClassifier?Aux]
  | .never, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp [MetaClassifier.toTokens, parseClassifier?Aux]
  | .atDecl n, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseClassifier?Aux]
        have hn : f ≥ nameDepth n := by simp [classifierDepth] at h; omega
        rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
  | .inFile s, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp [MetaClassifier.toTokens, parseClassifier?Aux]
  | .underAttribute n, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseClassifier?Aux]
        have hn : f ≥ nameDepth n := by
          simp [classifierDepth] at h; omega
        rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
  | .dependencyOf n, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseClassifier?Aux]
        have hn : f ≥ nameDepth n := by
          simp [classifierDepth] at h; omega
        rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
  | .inNamespace n, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseClassifier?Aux]
        have hn : f ≥ nameDepth n := by
          simp [classifierDepth] at h; omega
        rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
  | .meet a b, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseClassifier?Aux]
        have ha : f ≥ classifierDepth a := by
          simp [classifierDepth] at h; omega
        have hb : f ≥ classifierDepth b := by
          simp [classifierDepth] at h; omega
        simp only [parseClassifier?Aux_correct a _ f ha,
                   parseClassifier?Aux_correct b _ f hb]
  | .join a b, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [classifierDepth] at h
      · simp only [MetaClassifier.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseClassifier?Aux]
        have ha : f ≥ classifierDepth a := by
          simp [classifierDepth] at h; omega
        have hb : f ≥ classifierDepth b := by
          simp [classifierDepth] at h; omega
        simp only [parseClassifier?Aux_correct a _ f ha,
                   parseClassifier?Aux_correct b _ f hb]

theorem parseMetaCTerm?Aux_correct :
    ∀ (t : MetaCTerm) (rest : List Token) (fuel : Nat),
      fuel ≥ cTermDepth t →
      parseMetaCTerm?Aux fuel (t.toTokens ++ rest) = some (t, rest)
  | .empty, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp [MetaCTerm.toTokens, parseMetaCTerm?Aux]
  | .ident n, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseMetaCTerm?Aux]
        have hn : f ≥ nameDepth n := by simp [cTermDepth] at h; omega
        rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
  | .sym s, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp [MetaCTerm.toTokens, parseMetaCTerm?Aux]
  | .app fa aa, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseMetaCTerm?Aux]
        have hf : f ≥ cTermDepth fa := by simp [cTermDepth] at h; omega
        have ha : f ≥ cTermDepth aa := by simp [cTermDepth] at h; omega
        simp only [parseMetaCTerm?Aux_correct fa _ f hf,
                   parseMetaCTerm?Aux_correct aa _ f ha]
  | .lam x t, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseMetaCTerm?Aux]
        have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
        simp only [parseMetaCTerm?Aux_correct t _ f ht]
  | .plam i t, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseMetaCTerm?Aux]
        have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
        simp only [parseMetaCTerm?Aux_correct t _ f ht]
  | .comp s A φ u t, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseMetaCTerm?Aux]
        have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega
        have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega
        have hu : f ≥ cTermDepth u := by simp [cTermDepth] at h; omega
        have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
        simp only [parseMetaCTerm?Aux_correct A _ f hA,
                   parseClassifier?Aux_correct φ _ f hφ,
                   parseMetaCTerm?Aux_correct u _ f hu,
                   parseMetaCTerm?Aux_correct t _ f ht]
  | .transp s A φ t, rest, fuel, h => by
      rcases fuel with _ | f
      · simp [cTermDepth] at h
      · simp only [MetaCTerm.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseMetaCTerm?Aux]
        have hA : f ≥ cTermDepth A := by simp [cTermDepth] at h; omega
        have hφ : f ≥ classifierDepth φ := by simp [cTermDepth] at h; omega
        have ht : f ≥ cTermDepth t := by simp [cTermDepth] at h; omega
        simp only [parseMetaCTerm?Aux_correct A _ f hA,
                   parseClassifier?Aux_correct φ _ f hφ,
                   parseMetaCTerm?Aux_correct t _ f ht]

theorem parseArtifact?Aux_correct :
    ∀ (a : MetaArtifact) (rest : List Token) (fuel : Nat),
      a.supported = true →
      fuel ≥ artifactDepth a →
      parseArtifact?Aux fuel (a.toTokens ++ rest) = some (a, rest)
  | .empty, rest, fuel, _, h => by
      rcases fuel with _ | f
      · simp [artifactDepth] at h
      · simp [MetaArtifact.toTokens, parseArtifact?Aux]
  | .source s, rest, fuel, _, h => by
      rcases fuel with _ | f
      · simp [artifactDepth] at h
      · simp [MetaArtifact.toTokens, parseArtifact?Aux]
  | .refTo n, rest, fuel, _, h => by
      rcases fuel with _ | f
      · simp [artifactDepth] at h
      · simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseArtifact?Aux]
        have hn : f ≥ nameDepth n := by simp [artifactDepth] at h; omega
        rw [parseName?Aux_correct n (Token.rparen :: rest) f hn]
  | .cterm m, rest, fuel, _, h => by
      rcases fuel with _ | f
      · simp [artifactDepth] at h
      · simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append,
                   List.nil_append, parseArtifact?Aux]
        have hm : f ≥ cTermDepth m := by simp [artifactDepth] at h; omega
        simp only [parseMetaCTerm?Aux_correct m _ f hm]
  | .declAt _, _, _, hsup, _ => by
      simp [MetaArtifact.supported] at hsup

-- ── Token-level round-trip — the canonical universal theorems ─────────────
-- These follow directly from the `parser?Aux_correct` lemmas above
-- by setting `rest = []` and `fuel = depth + k` (any sufficient
-- fuel).  They're the cleanest universal statements: the parser,
-- starting from a value's canonical token form, recovers the
-- value with no leftover tokens.  No String reasoning needed.

/-- Top-level token-entry parser.  Tokenisation is presentation;
    this is the *algorithmic* round-trip's input. -/
def MetaCTerm.fromTokens? (tokens : List Token) : Option MetaCTerm :=
  match parseMetaCTerm? tokens with
  | some (t, []) => some t
  | _            => none

def MetaClassifier.fromTokens? (tokens : List Token) : Option MetaClassifier :=
  match parseClassifier? tokens with
  | some (φ, []) => some φ
  | _            => none

def Infoductor.nameFromTokens? (tokens : List Token) : Option Lean.Name :=
  match parseName? tokens with
  | some (n, []) => some n
  | _            => none

def MetaArtifact.fromTokens? (tokens : List Token) : Option MetaArtifact :=
  match parseArtifact? tokens with
  | some (a, []) => some a
  | _            => none

-- Length-vs-depth lemmas: each toTokens output is at least as long
-- as the depth of the value, so `tokens.length + 1` is always
-- sufficient fuel.

theorem nameToTokens_length_bound (n : Lean.Name) :
    (nameToTokens n).length + 1 ≥ nameDepth n := by
  induction n with
  | anonymous => simp [nameToTokens, nameDepth]
  | str p s ih => simp [nameToTokens, nameDepth]; omega
  | num p k ih => simp [nameToTokens, nameDepth]; omega

theorem classifierToTokens_length_bound (φ : MetaClassifier) :
    (φ.toTokens).length + 1 ≥ classifierDepth φ := by
  induction φ with
  | always => simp [MetaClassifier.toTokens, classifierDepth]
  | never => simp [MetaClassifier.toTokens, classifierDepth]
  | atDecl n =>
      simp [MetaClassifier.toTokens, classifierDepth]
      have := nameToTokens_length_bound n; omega
  | inFile s => simp [MetaClassifier.toTokens, classifierDepth]
  | underAttribute n =>
      simp [MetaClassifier.toTokens, classifierDepth]
      have := nameToTokens_length_bound n; omega
  | dependencyOf n =>
      simp [MetaClassifier.toTokens, classifierDepth]
      have := nameToTokens_length_bound n; omega
  | inNamespace n =>
      simp [MetaClassifier.toTokens, classifierDepth]
      have := nameToTokens_length_bound n; omega
  | meet a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega
  | join a b iha ihb => simp [MetaClassifier.toTokens, classifierDepth]; omega

theorem cTermToTokens_length_bound (t : MetaCTerm) :
    (t.toTokens).length + 1 ≥ cTermDepth t := by
  induction t with
  | empty => simp [MetaCTerm.toTokens, cTermDepth]
  | ident n =>
      simp [MetaCTerm.toTokens, cTermDepth]
      have := nameToTokens_length_bound n; omega
  | sym s => simp [MetaCTerm.toTokens, cTermDepth]
  | app f a ihf iha => simp [MetaCTerm.toTokens, cTermDepth]; omega
  | lam x t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega
  | plam i t ih => simp [MetaCTerm.toTokens, cTermDepth]; omega
  | comp s A φ u t ihA ihu iht =>
      simp [MetaCTerm.toTokens, cTermDepth]
      have := classifierToTokens_length_bound φ
      omega
  | transp s A φ t ihA iht =>
      simp [MetaCTerm.toTokens, cTermDepth]
      have := classifierToTokens_length_bound φ
      omega

/-- Every meta-mirror value's canonical token form parses back to
    the same value.  Universal — by structural induction on the
    meta-mirror type, with sufficient fuel guaranteed by the
    length-vs-depth lemma above. -/
theorem nameFromTokens?_round_trip (n : Lean.Name) :
    Infoductor.nameFromTokens? (nameToTokens n) = some n := by
  unfold Infoductor.nameFromTokens? parseName?
  have h := nameToTokens_length_bound n
  have := parseName?Aux_correct n [] ((nameToTokens n).length + 1) h
  rw [List.append_nil] at this
  rw [this]

theorem classifierFromTokens?_round_trip (φ : MetaClassifier) :
    MetaClassifier.fromTokens? φ.toTokens = some φ := by
  unfold MetaClassifier.fromTokens? parseClassifier?
  have h := classifierToTokens_length_bound φ
  have := parseClassifier?Aux_correct φ [] ((φ.toTokens).length + 1) h
  rw [List.append_nil] at this
  rw [this]

theorem cTermFromTokens?_round_trip (t : MetaCTerm) :
    MetaCTerm.fromTokens? t.toTokens = some t := by
  unfold MetaCTerm.fromTokens? parseMetaCTerm?
  have h := cTermToTokens_length_bound t
  have := parseMetaCTerm?Aux_correct t [] ((t.toTokens).length + 1) h
  rw [List.append_nil] at this
  rw [this]

theorem artifactFromTokens?_round_trip (a : MetaArtifact)
    (hsup : a.supported = true) :
    MetaArtifact.fromTokens? a.toTokens = some a := by
  unfold MetaArtifact.fromTokens? parseArtifact?
  -- artifact length bound
  have hlen : (a.toTokens).length + 1 ≥ artifactDepth a := by
    cases a with
    | empty => simp [MetaArtifact.toTokens, artifactDepth]
    | source s => simp [MetaArtifact.toTokens, artifactDepth]
    | declAt _ => simp [MetaArtifact.supported] at hsup
    | cterm m =>
        simp [MetaArtifact.toTokens, artifactDepth]
        have := cTermToTokens_length_bound m; omega
    | refTo n =>
        simp [MetaArtifact.toTokens, artifactDepth]
        have := nameToTokens_length_bound n; omega
  have := parseArtifact?Aux_correct a [] ((a.toTokens).length + 1) hsup hlen
  rw [List.append_nil] at this
  rw [this]

-- ── Phase 3: tokenize ∘ render = toTokens ─────────────────────────────────
-- The String-level half.  Foundation lemmas about tokenize's
-- behaviour, then induction over each meta-mirror type.

/-- `tokenize` on `(` :: rest reduces to `lparen :: tokenize rest`. -/
theorem tokenize_lparen_cons (rest : List Char) :
    tokenize ('(' :: rest) = Token.lparen :: tokenize rest := by
  simp [tokenize, tokenizeAux]

/-- `tokenize` on `)` :: rest reduces to `rparen :: tokenize rest`. -/
theorem tokenize_rparen_cons (rest : List Char) :
    tokenize (')' :: rest) = Token.rparen :: tokenize rest := by
  simp [tokenize, tokenizeAux]

/-- `tokenize` skips a leading space. -/
theorem tokenize_space_cons (rest : List Char) :
    tokenize (' ' :: rest) = tokenize rest := by
  simp [tokenize, tokenizeAux, isWhitespace]

-- Atomic-arm Phase 3 witnesses via `decide`.  Each closed input
-- reduces in the kernel to a concrete token list; we prove
-- structural identity between `tokenize (toLeanSource v).toList`
-- and `toTokens v` for these atomic shapes.

set_option maxRecDepth 2000 in
theorem tokenize_render_name_anonymous :
    tokenize (nameToLeanSource Lean.Name.anonymous).toList =
      nameToTokens Lean.Name.anonymous := by
  decide

set_option maxRecDepth 2000 in
theorem tokenize_render_classifier_always :
    tokenize (MetaClassifier.toLeanSource MetaClassifier.always).toList =
      MetaClassifier.toTokens MetaClassifier.always := by
  decide

set_option maxRecDepth 2000 in
theorem tokenize_render_classifier_never :
    tokenize (MetaClassifier.toLeanSource MetaClassifier.never).toList =
      MetaClassifier.toTokens MetaClassifier.never := by
  decide

set_option maxRecDepth 2000 in
theorem tokenize_render_cterm_empty :
    tokenize (MetaCTerm.toLeanSource MetaCTerm.empty).toList =
      MetaCTerm.toTokens MetaCTerm.empty := by
  decide

set_option maxRecDepth 2000 in
theorem tokenize_render_artifact_empty :
    tokenize (MetaArtifact.toLeanSource MetaArtifact.empty).toList =
      MetaArtifact.toTokens MetaArtifact.empty := by
  decide

-- ── Phase 3 distribution lemmas — open question ────────────────────────────
-- Proving `tokenize ∘ render = toTokens` requires four substantial
-- distribution lemmas:
--
--   readIdent_app : reading an ident sequence followed by a clean
--                   boundary returns the accumulated ident.
--   readStrLit_app: reading an escapeStrLit-encoded body returns
--                   the original String.
--   tokenize_app_clean: tokenize distributes over a concatenation
--                   where the prefix ends "cleanly".
--   tokenize_render_X: induction over each meta-mirror type.
--
-- Each requires reasoning about Lean 4.30's `String` API (UTF-8
-- ByteArray-backed) — specifically the equation
-- `s.push c ++ xs.asString = s ++ (c :: xs).asString`, which is
-- *not* definitionally true and needs a String-internals lemma.
--
-- Cleaner paths forward: (a) refactor `readIdent`/`readStrLit` to
-- accumulate into a `List Char` rather than a `String`, then build
-- the String at the end via `List.asString` — decouples the proof
-- from String internals; or (b) import Mathlib's richer String
-- API which provides the needed lemmas.

-- ── Round-trip — atomic kernel-reducible witnesses ─────────────────────────
-- For non-recursive shapes the round-trip is closed by `rfl` (or
-- `decide`) directly: rendering produces a fixed string, tokenising
-- produces a fixed token list, parsing returns the original.  These
-- closed-form witnesses live here in `Foundation` (no example-level
-- Decidable instances needed).

theorem MetaCTerm.toLeanSource_empty_round_trip :
    MetaCTerm.fromLeanSource? MetaCTerm.empty.toLeanSource = some MetaCTerm.empty := by
  decide

theorem MetaClassifier.toLeanSource_always_round_trip :
    MetaClassifier.fromLeanSource? MetaClassifier.always.toLeanSource =
      some MetaClassifier.always := by
  decide

theorem MetaClassifier.toLeanSource_never_round_trip :
    MetaClassifier.fromLeanSource? MetaClassifier.never.toLeanSource =
      some MetaClassifier.never := by
  decide

theorem MetaArtifact.toLeanSource_empty_round_trip_render :
    (MetaArtifact.fromLeanSource? MetaArtifact.empty.toLeanSource).map
      MetaArtifact.toLeanSource = some MetaArtifact.empty.toLeanSource := by
  decide

-- ── Round-trip — compositional shapes via decide (closed inputs) ───────────
-- These witness round-trip for arbitrary closed `MetaCTerm` values
-- via the kernel's `decide`.  Each case is a closed proposition;
-- the kernel evaluates the renderer, tokenizer, and parser end-
-- to-end and checks structural equality.

theorem MetaCTerm.toLeanSource_ident_anonymous_round_trip :
    MetaCTerm.fromLeanSource? (MetaCTerm.ident Lean.Name.anonymous).toLeanSource =
      some (MetaCTerm.ident Lean.Name.anonymous) := by
  decide

theorem MetaCTerm.toLeanSource_app_empty_round_trip :
    MetaCTerm.fromLeanSource? (MetaCTerm.app .empty .empty).toLeanSource =
      some (MetaCTerm.app .empty .empty) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_lam_x_empty_round_trip :
    MetaCTerm.fromLeanSource? (MetaCTerm.lam "x" .empty).toLeanSource =
      some (MetaCTerm.lam "x" .empty) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_comp_round_trip :
    MetaCTerm.fromLeanSource?
      (MetaCTerm.comp "i" .empty .always .empty .empty).toLeanSource =
      some (MetaCTerm.comp "i" .empty .always .empty .empty) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_transp_round_trip :
    MetaCTerm.fromLeanSource?
      (MetaCTerm.transp "i" .empty .never .empty).toLeanSource =
      some (MetaCTerm.transp "i" .empty .never .empty) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_plam_round_trip :
    MetaCTerm.fromLeanSource? (MetaCTerm.plam "i" .empty).toLeanSource =
      some (MetaCTerm.plam "i" .empty) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaCTerm.toLeanSource_sym_round_trip :
    MetaCTerm.fromLeanSource? (MetaCTerm.sym "x").toLeanSource =
      some (MetaCTerm.sym "x") := by
  decide

-- Compositional witness: a `.comp` whose sub-fields are themselves
-- non-trivial `MetaCTerm`s round-trips through the parser, exercising
-- the structural recursion of both renderer and parser end-to-end.
set_option maxRecDepth 16000 in
theorem MetaCTerm.toLeanSource_nested_round_trip :
    MetaCTerm.fromLeanSource?
      (MetaCTerm.comp "i" (.ident `Univ) .always
        (.lam "x" (.sym "y")) (.sym "z")).toLeanSource =
      some (MetaCTerm.comp "i" (.ident `Univ) .always
        (.lam "x" (.sym "y")) (.sym "z")) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_meet_round_trip :
    MetaClassifier.fromLeanSource?
      (MetaClassifier.meet .always .never).toLeanSource =
      some (MetaClassifier.meet .always .never) := by
  decide

set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_atDecl_round_trip :
    MetaClassifier.fromLeanSource?
      (MetaClassifier.atDecl `Foo).toLeanSource =
      some (MetaClassifier.atDecl `Foo) := by
  decide

-- 3-level deep name regression test: `eq0.i` (the FaceFormula
-- `eq0` encoding) round-trips.  Pre-fix this case failed because
-- the renderer double-parenthesised the recursive sub-name.
set_option maxRecDepth 4000 in
theorem MetaClassifier.toLeanSource_atDecl_three_level_round_trip :
    MetaClassifier.fromLeanSource?
      (MetaClassifier.atDecl
        (Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")).toLeanSource =
      some (MetaClassifier.atDecl
        (Lean.Name.mkStr (Lean.Name.mkSimple "eq0") "i")) := by
  decide

end Infoductor