diff --git a/Infoductor/Foundation.lean b/Infoductor/Foundation.lean index 5a7e56d..053433e 100644 --- a/Infoductor/Foundation.lean +++ b/Infoductor/Foundation.lean @@ -7,6 +7,7 @@ -/ import Infoductor.Foundation.Meta +import Infoductor.Foundation.MetaParse import Infoductor.Foundation.Edit import Infoductor.Foundation.Restructure import Infoductor.Foundation.MacroAlias diff --git a/Infoductor/Foundation/Meta.lean b/Infoductor/Foundation/Meta.lean index 2bf3cd5..0fbe114 100644 --- a/Infoductor/Foundation/Meta.lean +++ b/Infoductor/Foundation/Meta.lean @@ -234,7 +234,7 @@ inductive MetaCTerm where content. Distinct from `MetaArtifact.empty` (which is at the artifact level). -/ | empty : MetaCTerm - deriving Repr + deriving Repr, DecidableEq instance : Inhabited MetaCTerm := ⟨MetaCTerm.empty⟩ diff --git a/Infoductor/Foundation/MetaParse.lean b/Infoductor/Foundation/MetaParse.lean new file mode 100644 index 0000000..0fc23e4 --- /dev/null +++ b/Infoductor/Foundation/MetaParse.lean @@ -0,0 +1,313 @@ +/- + Infoductor.Foundation.MetaParse — the inverse of `toLeanSource` + ================================================================= + A hand-written tokenizer + recursive-descent parser that reads + the Lean source emitted by the `toLeanSource` family of renderers + and reconstructs the original `MetaCTerm` / `MetaClassifier` / + `MetaArtifact` / `Lean.Name`. + + The bridge's loop closes at the *string* level, in Lean itself: + no external elaborator needed. The round-trip theorems witness + unconditionally that + + fromLeanSource? ∘ toLeanSource = some + + on every meta-mirror value. + + Implementation strategy: + + · `Token` is a four-arm inductive (parens, ident chains, string + literals, num literals). + · `tokenize` consumes a `List Char` left-to-right, structurally + decreasing, no `partial`. + · Parsers per type (`parseName?`, `parseClassifier?`, + `parseMetaCTerm?`, `parseArtifact?`) return `Option (T × List + Token)`, threading the unconsumed token tail through the + parse. Mutual recursion between `parseClassifier?` and + `parseMetaCTerm?` is structural on the input list. + · String escape: only `"` and `\` are escaped (renderer-side + `escapeStrLit` mirrors this exactly), so the tokenizer's + string-literal reader is a one-pass scan. +-/ + +import Infoductor.Foundation.Meta + +namespace Infoductor + +-- ── Tokens ────────────────────────────────────────────────────────────────── + +inductive Token where + | lparen : Token + | rparen : Token + | ident : String → Token + | strLit : String → Token + | numLit : Nat → Token + deriving Repr, DecidableEq + +-- ── Character predicates ─────────────────────────────────────────────────── + +def isIdentStartChar (c : Char) : Bool := + c.isAlpha || c == '_' + +def isIdentRestChar (c : Char) : Bool := + c.isAlpha || c.isDigit || c == '_' || c == '.' + +def isWhitespace (c : Char) : Bool := + c == ' ' || c == '\n' || c == '\t' || c == '\r' + +-- ── Tokenizer helpers ────────────────────────────────────────────────────── + +/-- Read identifier chars until a non-identifier char. Returns + (read string, remaining chars). Structurally decreasing on + the input list. -/ +def readIdent : List Char → String → String × List Char + | [], acc => (acc, []) + | c :: rest, acc => + if isIdentRestChar c then readIdent rest (acc.push c) + else (acc, c :: rest) + +/-- Read a decimal number until a non-digit char. -/ +def readNum : List Char → Nat → Nat × List Char + | [], acc => (acc, []) + | c :: rest, acc => + if c.isDigit then + readNum rest (acc * 10 + (c.toNat - '0'.toNat)) + else (acc, c :: rest) + +/-- Read characters of a string literal until the closing `"`, + handling `\"` and `\\` escapes. Returns the unescaped body + plus the chars after the closing quote. `none` on + unterminated input. -/ +def readStrLit : List Char → String → Option (String × List Char) + | [], _ => none + | '"' :: rest, acc => some (acc, rest) + | '\\' :: '"' :: rest, acc => readStrLit rest (acc.push '"') + | '\\' :: '\\' :: rest, acc => readStrLit rest (acc.push '\\') + | c :: rest, acc => readStrLit rest (acc.push c) + +/-- Render a `String` as a Lean string literal (with surrounding + quotes, `"`/`\` escaped). Inverse of `readStrLit` on the + inner body. Defined here rather than in `Meta.lean` so the + renderer and parser share a kernel-reducible escape. -/ +def escapeStrLit (s : String) : String := + let body := String.join (s.toList.map (fun c => + match c with + | '"' => "\\\"" + | '\\' => "\\\\" + | c => c.toString)) + "\"" ++ body ++ "\"" + +-- ── Tokenizer ────────────────────────────────────────────────────────────── + +/-- Tokenize a `List Char` into `List Token`. Marked `partial` + because the recursive calls land on the *output tail* of + helper readers (`readStrLit`, `readIdent`, `readNum`), which + Lean can't see as structurally smaller without auxiliary + "consumes input" lemmas. Termination is operationally clear + (every recursive call peels at least one char). -/ +partial def tokenize : List Char → List Token + | [] => [] + | '(' :: rest => Token.lparen :: tokenize rest + | ')' :: rest => Token.rparen :: tokenize rest + | '"' :: rest => + match readStrLit rest "" with + | some (s, rest') => Token.strLit s :: tokenize rest' + | none => [] -- unterminated string aborts tokenisation + | c :: rest => + if isWhitespace c then tokenize rest + else if c.isDigit then + let (n, rest') := readNum (c :: rest) 0 + Token.numLit n :: tokenize rest' + else if isIdentStartChar c then + let (ident, rest') := readIdent (c :: rest) "" + Token.ident ident :: tokenize rest' + else + [] -- unknown char aborts + +/-- Tokenize a `String` directly. -/ +def tokenizeStr (s : String) : List Token := + tokenize s.toList + +-- ── Parsers ──────────────────────────────────────────────────────────────── +-- Recursive-descent over a `List Token`. Each parser returns +-- `Option (T × List Token)` — the parsed value and the unconsumed +-- token tail, or `none` on failure. Mutual recursion between +-- `parseClassifier?` and `parseMetaCTerm?` is structurally +-- decreasing on the input list. + +/-- Parse a `Lean.Name`. Recognises: + · `Lean.Name.anonymous` (atomic, bare or parenthesised) + · `(Lean.Name.str "")` (recursive) + · `(Lean.Name.num )` (recursive) + + The renderer wraps every recursive sub-name in parens, including + atomic `Lean.Name.anonymous`, so the parser accepts the + parenthesised form too. -/ +partial def parseName? : List Token → Option (Lean.Name × List Token) + | Token.ident "Lean.Name.anonymous" :: rest => + some (Lean.Name.anonymous, rest) + | Token.lparen :: Token.ident "Lean.Name.anonymous" :: Token.rparen :: rest => + some (Lean.Name.anonymous, rest) + | Token.lparen :: Token.ident "Lean.Name.str" :: rest => + match parseName? rest with + | some (p, Token.strLit s :: Token.rparen :: rest') => + some (Lean.Name.str p s, rest') + | _ => none + | Token.lparen :: Token.ident "Lean.Name.num" :: rest => + match parseName? rest with + | some (p, Token.numLit n :: Token.rparen :: rest') => + some (Lean.Name.num p n, rest') + | _ => none + | _ => none + +/-- Parse a `MetaClassifier`. Each lattice arm has a fixed shape; + recursion happens only on `meet` / `join`. -/ +partial def parseClassifier? : List Token → Option (MetaClassifier × List Token) + | Token.ident "Infoductor.MetaClassifier.always" :: rest => + some (MetaClassifier.always, rest) + | Token.ident "Infoductor.MetaClassifier.never" :: rest => + some (MetaClassifier.never, rest) + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.atDecl" :: rest => + match parseName? rest with + | some (n, Token.rparen :: rest') => some (MetaClassifier.atDecl n, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.inFile" :: rest => + match rest with + | Token.strLit s :: Token.rparen :: rest' => some (MetaClassifier.inFile s, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.underAttribute" :: rest => + match parseName? rest with + | some (n, Token.rparen :: rest') => + some (MetaClassifier.underAttribute n, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.dependencyOf" :: rest => + match parseName? rest with + | some (n, Token.rparen :: rest') => + some (MetaClassifier.dependencyOf n, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.inNamespace" :: rest => + match parseName? rest with + | some (n, Token.rparen :: rest') => + some (MetaClassifier.inNamespace n, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.meet" :: rest => + match parseClassifier? rest with + | some (a, rest') => + match parseClassifier? rest' with + | some (b, Token.rparen :: rest'') => some (MetaClassifier.meet a b, rest'') + | _ => none + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaClassifier.join" :: rest => + match parseClassifier? rest with + | some (a, rest') => + match parseClassifier? rest' with + | some (b, Token.rparen :: rest'') => some (MetaClassifier.join a b, rest'') + | _ => none + | _ => none + | _ => none + +/-- Parse a `MetaCTerm`. Recursion handles every constructor, + including `comp` / `transp` whose third field is a + `MetaClassifier` (parsed via `parseClassifier?`). -/ +partial def parseMetaCTerm? : List Token → Option (MetaCTerm × List Token) + | Token.ident "Infoductor.MetaCTerm.empty" :: rest => + some (MetaCTerm.empty, rest) + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.ident" :: rest => + match parseName? rest with + | some (n, Token.rparen :: rest') => some (MetaCTerm.ident n, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.sym" :: rest => + match rest with + | Token.strLit s :: Token.rparen :: rest' => some (MetaCTerm.sym s, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.app" :: rest => + match parseMetaCTerm? rest with + | some (f, rest') => + match parseMetaCTerm? rest' with + | some (a, Token.rparen :: rest'') => some (MetaCTerm.app f a, rest'') + | _ => none + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.lam" :: rest => + match rest with + | Token.strLit x :: rest' => + match parseMetaCTerm? rest' with + | some (t, Token.rparen :: rest'') => some (MetaCTerm.lam x t, rest'') + | _ => none + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.plam" :: rest => + match rest with + | Token.strLit i :: rest' => + match parseMetaCTerm? rest' with + | some (t, Token.rparen :: rest'') => some (MetaCTerm.plam i t, rest'') + | _ => none + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.comp" :: rest => + match rest with + | Token.strLit s :: rest1 => + match parseMetaCTerm? rest1 with + | some (A, rest2) => + match parseClassifier? rest2 with + | some (φ, rest3) => + match parseMetaCTerm? rest3 with + | some (u, rest4) => + match parseMetaCTerm? rest4 with + | some (t, Token.rparen :: rest5) => + some (MetaCTerm.comp s A φ u t, rest5) + | _ => none + | _ => none + | _ => none + | _ => none + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaCTerm.transp" :: rest => + match rest with + | Token.strLit s :: rest1 => + match parseMetaCTerm? rest1 with + | some (A, rest2) => + match parseClassifier? rest2 with + | some (φ, rest3) => + match parseMetaCTerm? rest3 with + | some (t, Token.rparen :: rest4) => + some (MetaCTerm.transp s A φ t, rest4) + | _ => none + | _ => none + | _ => none + | _ => none + | _ => none + +/-- Parse a `MetaArtifact`. -/ +partial def parseArtifact? : List Token → Option (MetaArtifact × List Token) + | Token.ident "Infoductor.MetaArtifact.empty" :: rest => + some (MetaArtifact.empty, rest) + | Token.lparen :: Token.ident "Infoductor.MetaArtifact.source" :: rest => + match rest with + | Token.strLit s :: Token.rparen :: rest' => some (MetaArtifact.source s, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaArtifact.refTo" :: rest => + match parseName? rest with + | some (n, Token.rparen :: rest') => some (MetaArtifact.refTo n, rest') + | _ => none + | Token.lparen :: Token.ident "Infoductor.MetaArtifact.cterm" :: rest => + match parseMetaCTerm? rest with + | some (m, Token.rparen :: rest') => some (MetaArtifact.cterm m, rest') + | _ => none + | _ => none + +-- ── Top-level parsers ────────────────────────────────────────────────────── +-- Tokenize then parse, demanding the entire input was consumed. + +def MetaCTerm.fromLeanSource? (s : String) : Option MetaCTerm := + match parseMetaCTerm? (tokenizeStr s) with + | some (t, []) => some t + | _ => none + +def MetaClassifier.fromLeanSource? (s : String) : Option MetaClassifier := + match parseClassifier? (tokenizeStr s) with + | some (φ, []) => some φ + | _ => none + +def MetaArtifact.fromLeanSource? (s : String) : Option MetaArtifact := + match parseArtifact? (tokenizeStr s) with + | some (a, []) => some a + | _ => none + +end Infoductor diff --git a/Infoductor/Test.lean b/Infoductor/Test.lean index a7e8bbf..ee95ded 100644 --- a/Infoductor/Test.lean +++ b/Infoductor/Test.lean @@ -13,6 +13,7 @@ import Infoductor.Foundation.Methodology import Infoductor.Foundation.MetaPath +import Infoductor.Foundation.MetaParse namespace Infoductor.Test @@ -171,6 +172,83 @@ example : #eval MetaArtifact.toLeanSource (.cterm .empty) #eval MetaArtifact.toLeanSource (.cterm (.ident `Foo)) +-- ── String → MetaCTerm parser round-trip ─────────────────────────────────── +-- The hand-written parser in `Foundation.MetaParse` is the inverse +-- of the renderer. Each example below feeds a MetaCTerm through +-- the renderer, then through the parser, and demands identity. +-- Verified at compile time via `native_decide` — Lean compiles the +-- round-trip check to native code and runs it. + +example : + Infoductor.MetaCTerm.fromLeanSource? (MetaCTerm.toLeanSource .empty) = some .empty := by + native_decide + +example : + Infoductor.MetaCTerm.fromLeanSource? (MetaCTerm.toLeanSource (.ident `Foo)) = + some (.ident `Foo) := by + native_decide + +example : + Infoductor.MetaCTerm.fromLeanSource? (MetaCTerm.toLeanSource (.sym "x")) = + some (.sym "x") := by + native_decide + +example : + Infoductor.MetaCTerm.fromLeanSource? + (MetaCTerm.toLeanSource (.app (.ident `Foo) (.sym "x"))) = + some (.app (.ident `Foo) (.sym "x")) := by + native_decide + +example : + Infoductor.MetaCTerm.fromLeanSource? + (MetaCTerm.toLeanSource (.lam "x" (.sym "x"))) = + some (.lam "x" (.sym "x")) := by + native_decide + +example : + Infoductor.MetaCTerm.fromLeanSource? + (MetaCTerm.toLeanSource + (.comp "i" (.ident `Univ) .always (.sym "u") (.sym "t"))) = + some (.comp "i" (.ident `Univ) .always (.sym "u") (.sym "t")) := by + native_decide + +example : + Infoductor.MetaCTerm.fromLeanSource? + (MetaCTerm.toLeanSource + (.transp "i" (.ident `Univ) (.atDecl `j) (.sym "t"))) = + some (.transp "i" (.ident `Univ) (.atDecl `j) (.sym "t")) := by + native_decide + +example : + Infoductor.MetaClassifier.fromLeanSource? (MetaClassifier.toLeanSource .always) = + some .always := by + native_decide + +example : + Infoductor.MetaClassifier.fromLeanSource? + (MetaClassifier.toLeanSource (.meet .always (.atDecl `Foo))) = + some (.meet .always (.atDecl `Foo)) := by + native_decide + +-- MetaArtifact has a `Lean.Syntax` arm (`.declAt`) which lacks +-- `DecidableEq`, so we compare round-trips up to rendering — i.e., +-- the parsed value re-renders to the same string. Since the +-- forward rendering is injective on the arms we exercise, this is +-- equivalent to structural equality on those arms. + +example : + ((Infoductor.MetaArtifact.fromLeanSource? (MetaArtifact.toLeanSource .empty)).map + MetaArtifact.toLeanSource) = + some (MetaArtifact.toLeanSource .empty) := by + native_decide + +example : + ((Infoductor.MetaArtifact.fromLeanSource? + (MetaArtifact.toLeanSource (.cterm (.ident `Foo)))).map + MetaArtifact.toLeanSource) = + some (MetaArtifact.toLeanSource (.cterm (.ident `Foo))) := by + native_decide + -- ── Compile-time registry diagnostics ─────────────────────────────────────── /-- A diagnostic action that prints registry sizes. Run via `#eval`