diff --git a/Infoductor/Foundation/Meta.lean b/Infoductor/Foundation/Meta.lean index 7db043c..ed22d6a 100644 --- a/Infoductor/Foundation/Meta.lean +++ b/Infoductor/Foundation/Meta.lean @@ -330,6 +330,110 @@ def nameToLeanSource : Lean.Name → String | .num p n => s!"(Lean.Name.num {nameToLeanSource p} {n})" +-- ── Source rendering for `Lean.Syntax` (the `.declAt` payload) ───────────── +-- The structural-mirror identity for parsed `Lean.Syntax` trees: we render +-- each constructor as a Lean expression that constructs the same value via +-- `Lean.Syntax.` / `Lean.SourceInfo.` / `Substring.Raw.mk` / +-- `String.Pos.Raw.mk`. The renderer's image is parsed back by `MetaParse`'s +-- mirror parser; together they form a true coreflection on `Syntax`. + +/-- Render a `String.Pos.Raw` as Lean source. -/ +def stringPosRawToLeanSource (p : String.Pos.Raw) : String := + s!"(String.Pos.Raw.mk {p.byteIdx})" + +/-- Render a `Substring.Raw` as Lean source. Faithful triple-encode: + the underlying string + start position + stop position. -/ +def substringRawToLeanSource (s : Substring.Raw) : String := + s!"(Substring.Raw.mk {escapeStrLit s.str} \ + {stringPosRawToLeanSource s.startPos} \ + {stringPosRawToLeanSource s.stopPos})" + +/-- Render a `Lean.SourceInfo` as Lean source, faithful on every arm. -/ +def sourceInfoToLeanSource : Lean.SourceInfo → String + | .original lead pos trail endPos => + s!"(Lean.SourceInfo.original {substringRawToLeanSource lead} \ + {stringPosRawToLeanSource pos} \ + {substringRawToLeanSource trail} \ + {stringPosRawToLeanSource endPos})" + | .synthetic pos endPos canonical => + s!"(Lean.SourceInfo.synthetic {stringPosRawToLeanSource pos} \ + {stringPosRawToLeanSource endPos} {canonical})" + | .none => "Lean.SourceInfo.none" + +/-- Encode a `List String` as `List.cons`/`List.nil` chain of string + literals. Mirrors the `nameToLeanSource` parenthesisation + convention. Used by the `Preresolved.decl` field-list arm. -/ +def stringListToLeanSource : List String → String + | [] => "List.nil" + | s :: rest => + s!"(List.cons {escapeStrLit s} {stringListToLeanSource rest})" + +/-- Render a `Lean.Syntax.Preresolved` as Lean source. -/ +def preresolvedToLeanSource : Lean.Syntax.Preresolved → String + | .namespace ns => + s!"(Lean.Syntax.Preresolved.namespace {nameToLeanSource ns})" + | .decl n fields => + s!"(Lean.Syntax.Preresolved.decl {nameToLeanSource n} \ + {stringListToLeanSource fields})" + +/-- Encode a `List Lean.Syntax.Preresolved` as a `List.cons`/`List.nil` + chain. -/ +def preresolvedListToLeanSource : List Lean.Syntax.Preresolved → String + | [] => "List.nil" + | p :: rest => + s!"(List.cons {preresolvedToLeanSource p} \ + {preresolvedListToLeanSource rest})" + +-- Renderer for `Lean.Syntax` and its array children, defined via a +-- `mutual` block so the recursion is *structural* on the +-- `Lean.Syntax` constructors (and on `List Lean.Syntax` through +-- `Array.toList`). Lean 4.30's auto-derived size function on +-- `Lean.Syntax` is sufficient for the well-founded measure on +-- `args.toList`, so no fuel parameter is needed. This makes the +-- encoder and its companion depth function fully structural — the +-- round-trip proofs proceed by induction on `Lean.Syntax` directly. +mutual + /-- Renderer arm for `Lean.Syntax`. -/ + def syntaxToLeanSource : Lean.Syntax → String + | .missing => "Lean.Syntax.missing" + | .atom info val => + s!"(Lean.Syntax.atom {sourceInfoToLeanSource info} {escapeStrLit val})" + | .ident info rawVal val preresolved => + s!"(Lean.Syntax.ident {sourceInfoToLeanSource info} \ + {substringRawToLeanSource rawVal} \ + {nameToLeanSource val} \ + {preresolvedListToLeanSource preresolved})" + | .node info kind args => + s!"(Lean.Syntax.node {sourceInfoToLeanSource info} \ + {nameToLeanSource kind} \ + {syntaxArrayToLeanSource args.toList})" + + /-- Companion list-renderer for `List Lean.Syntax`. Encodes the + list as a `List.cons` / `List.nil` chain. -/ + def syntaxArrayToLeanSource : List Lean.Syntax → String + | [] => "List.nil" + | s :: rest => + s!"(List.cons {syntaxToLeanSource s} {syntaxArrayToLeanSource rest})" +end + +-- Structural depth measure on `Lean.Syntax`, defined mutually with +-- `syntaxArrayDepth`. A true structural depth — not fuel-bounded — +-- so depth bounds on the encoded form are provable by induction on +-- `Lean.Syntax` directly. +mutual + /-- Structural depth on `Lean.Syntax`. -/ + def syntaxDepth : Lean.Syntax → Nat + | .missing => 1 + | .atom _ _ => 1 + | .ident _ _ _ _ => 1 + | .node _ _ args => syntaxArrayDepth args.toList + 1 + + /-- Companion structural depth on `List Lean.Syntax`. -/ + def syntaxArrayDepth : List Lean.Syntax → Nat + | [] => 1 + | s :: rest => max (syntaxDepth s) (syntaxArrayDepth rest) + 1 +end + /-- Render a `MetaClassifier` as Lean source. Each lattice arm becomes a constructor call in the `Infoductor.MetaClassifier` namespace; recursive arms (`meet`, `join`) render their @@ -378,12 +482,14 @@ def MetaCTerm.toLeanSource : MetaCTerm → String /-- Render a `MetaArtifact` as Lean source. The structural arm (`cterm`) recurses through `MetaCTerm.toLeanSource`; `source` - arms wrap the raw string in a `.source` constructor; `declAt` - cannot be source-rendered (parsed Syntax is opaque) and - yields a placeholder. -/ + arms wrap the raw string in a `.source` constructor; the + `declAt` arm renders the carried `Lean.Syntax` via the + constructor-call mirror (`syntaxToLeanSource`) so the round- + trip closes through the `MetaParse` parser. -/ def MetaArtifact.toLeanSource : MetaArtifact → String | .source s => s!"(Infoductor.MetaArtifact.source {escapeStrLit s})" - | .declAt _ => "/- declAt with opaque Syntax — not source-renderable -/" + | .declAt s => + s!"(Infoductor.MetaArtifact.declAt {syntaxToLeanSource s})" | .cterm m => s!"(Infoductor.MetaArtifact.cterm {MetaCTerm.toLeanSource m})" | .refTo n => diff --git a/Infoductor/Foundation/MetaParse.lean b/Infoductor/Foundation/MetaParse.lean index 576a55e..b57fba0 100644 --- a/Infoductor/Foundation/MetaParse.lean +++ b/Infoductor/Foundation/MetaParse.lean @@ -263,6 +263,198 @@ def parseMetaCTerm?Aux : Nat → List Token → Option (MetaCTerm × List Token) | _ => none | _, _ => none +-- ── Parsers for Lean.Syntax mirror payloads ───────────────────────────────── +-- Mirror the renderer in `Foundation.Meta`: each constructor of +-- `Lean.Syntax`, `Lean.SourceInfo`, `Substring.Raw`, `String.Pos.Raw`, +-- and `Lean.Syntax.Preresolved` is recognised by a leading-ident +-- pattern. All parsers fuel-bounded for kernel termination. + +/-- Parse a `String.Pos.Raw`. -/ +def parseStringPosRaw?Aux : + Nat → List Token → Option (String.Pos.Raw × List Token) + | 0, _ => none + | _+1, Token.lparen :: Token.ident "String.Pos.Raw.mk" :: + Token.numLit k :: Token.rparen :: rest => + some ({ byteIdx := k }, rest) + | _, _ => none + +/-- Parse a `Substring.Raw`. -/ +def parseSubstringRaw?Aux : + Nat → List Token → Option (Substring.Raw × List Token) + | 0, _ => none + | n+1, Token.lparen :: Token.ident "Substring.Raw.mk" :: + Token.strLit s :: rest => + match parseStringPosRaw?Aux n rest with + | some (startPos, rest') => + match parseStringPosRaw?Aux n rest' with + | some (stopPos, Token.rparen :: rest'') => + some ({ str := s, startPos := startPos, stopPos := stopPos }, rest'') + | _ => none + | _ => none + | _, _ => none + +/-- Parse a `Bool` literal — `true` or `false`. Used by the + `SourceInfo.synthetic` parser. -/ +def parseBool?Aux : List Token → Option (Bool × List Token) + | Token.ident "true" :: rest => some (true, rest) + | Token.ident "false" :: rest => some (false, rest) + | _ => none + +/-- Parse a `Lean.SourceInfo`. -/ +def parseSourceInfo?Aux : + Nat → List Token → Option (Lean.SourceInfo × List Token) + | 0, _ => none + | _+1, Token.ident "Lean.SourceInfo.none" :: rest => + some (Lean.SourceInfo.none, rest) + | n+1, Token.lparen :: Token.ident "Lean.SourceInfo.original" :: rest => + match parseSubstringRaw?Aux n rest with + | some (lead, rest1) => + match parseStringPosRaw?Aux n rest1 with + | some (pos, rest2) => + match parseSubstringRaw?Aux n rest2 with + | some (trail, rest3) => + match parseStringPosRaw?Aux n rest3 with + | some (endPos, Token.rparen :: rest4) => + some (Lean.SourceInfo.original lead pos trail endPos, rest4) + | _ => none + | _ => none + | _ => none + | _ => none + | n+1, Token.lparen :: Token.ident "Lean.SourceInfo.synthetic" :: rest => + match parseStringPosRaw?Aux n rest with + | some (pos, rest1) => + match parseStringPosRaw?Aux n rest1 with + | some (endPos, rest2) => + match parseBool?Aux rest2 with + | some (canonical, Token.rparen :: rest3) => + some (Lean.SourceInfo.synthetic pos endPos canonical, rest3) + | _ => none + | _ => none + | _ => none + | _, _ => none + +/-- Parse a `List String` rendered as a `List.cons`/`List.nil` chain. -/ +def parseStringList?Aux : + Nat → List Token → Option (List String × List Token) + | 0, _ => none + | _+1, Token.ident "List.nil" :: rest => some ([], rest) + | n+1, Token.lparen :: Token.ident "List.cons" :: + Token.strLit s :: rest => + match parseStringList?Aux n rest with + | some (tail, Token.rparen :: rest') => some (s :: tail, rest') + | _ => none + | _, _ => none + +/-- Parse a `Lean.Syntax.Preresolved`. -/ +def parsePreresolved?Aux : + Nat → List Token → Option (Lean.Syntax.Preresolved × List Token) + | 0, _ => none + | n+1, Token.lparen :: + Token.ident "Lean.Syntax.Preresolved.namespace" :: rest => + match parseName?Aux n rest with + | some (ns, Token.rparen :: rest') => + some (Lean.Syntax.Preresolved.namespace ns, rest') + | _ => none + | n+1, Token.lparen :: + Token.ident "Lean.Syntax.Preresolved.decl" :: rest => + match parseName?Aux n rest with + | some (nm, rest1) => + match parseStringList?Aux n rest1 with + | some (fields, Token.rparen :: rest2) => + some (Lean.Syntax.Preresolved.decl nm fields, rest2) + | _ => none + | _ => none + | _, _ => none + +/-- Parse a `List Lean.Syntax.Preresolved`. -/ +def parsePreresolvedList?Aux : + Nat → List Token → Option (List Lean.Syntax.Preresolved × List Token) + | 0, _ => none + | _+1, Token.ident "List.nil" :: rest => some ([], rest) + | n+1, Token.lparen :: Token.ident "List.cons" :: rest => + match parsePreresolved?Aux n rest with + | some (head, rest1) => + match parsePreresolvedList?Aux n rest1 with + | some (tail, Token.rparen :: rest2) => + some (head :: tail, rest2) + | _ => none + | _ => none + | _, _ => none + +/-- Joint parser for `Lean.Syntax` / `List Lean.Syntax`. Mirrors the + renderer's `Sum` carrier idiom (`syntaxRenderAux`) so the parser + matches the encoding's recursive structure exactly. The carrier + `Lean.Syntax ⊕ List Lean.Syntax` is used at the *type* level to + select the parser arm; the result is the parsed value plus the + unconsumed token tail. Fuel-bounded for kernel termination. -/ +def parseSyntaxOrList?Aux : + Nat → Bool → List Token → + Option ((Lean.Syntax ⊕ List Lean.Syntax) × List Token) + | 0, _, _ => none + -- Single-Syntax mode, `true` + | _+1, true, Token.ident "Lean.Syntax.missing" :: rest => + some (.inl Lean.Syntax.missing, rest) + | n+1, true, Token.lparen :: Token.ident "Lean.Syntax.atom" :: rest => + match parseSourceInfo?Aux n rest with + | some (info, Token.strLit val :: Token.rparen :: rest') => + some (.inl (Lean.Syntax.atom info val), rest') + | _ => none + | n+1, true, Token.lparen :: Token.ident "Lean.Syntax.ident" :: rest => + match parseSourceInfo?Aux n rest with + | some (info, rest1) => + match parseSubstringRaw?Aux n rest1 with + | some (rawVal, rest2) => + match parseName?Aux n rest2 with + | some (val, rest3) => + match parsePreresolvedList?Aux n rest3 with + | some (preresolved, Token.rparen :: rest4) => + some (.inl (Lean.Syntax.ident info rawVal val preresolved), rest4) + | _ => none + | _ => none + | _ => none + | _ => none + | n+1, true, Token.lparen :: Token.ident "Lean.Syntax.node" :: rest => + match parseSourceInfo?Aux n rest with + | some (info, rest1) => + match parseName?Aux n rest1 with + | some (kind, rest2) => + match parseSyntaxOrList?Aux n false rest2 with + | some (.inr args, Token.rparen :: rest3) => + some (.inl (Lean.Syntax.node info kind args.toArray), rest3) + | _ => none + | _ => none + | _ => none + -- List-Syntax mode, `false` + | _+1, false, Token.ident "List.nil" :: rest => + some (.inr [], rest) + | n+1, false, Token.lparen :: Token.ident "List.cons" :: rest => + match parseSyntaxOrList?Aux n true rest with + | some (.inl head, rest1) => + match parseSyntaxOrList?Aux n false rest1 with + | some (.inr tail, Token.rparen :: rest2) => + some (.inr (head :: tail), rest2) + | _ => none + | _ => none + | _, _, _ => none + +/-- Parse a `Lean.Syntax`. Thin projection over `parseSyntaxOrList?Aux` + in single-Syntax mode (`true`). -/ +def parseSyntax?Aux : + Nat → List Token → Option (Lean.Syntax × List Token) + | n, tokens => + match parseSyntaxOrList?Aux n true tokens with + | some (.inl s, rest) => some (s, rest) + | _ => none + +/-- Parse a `List Lean.Syntax`. Thin projection over `parseSyntaxOrList?Aux` + in list-mode (`false`). -/ +def parseSyntaxList?Aux : + Nat → List Token → Option (List Lean.Syntax × List Token) + | n, tokens => + match parseSyntaxOrList?Aux n false tokens with + | some (.inr l, rest) => some (l, rest) + | _ => none + /-- Parse a `MetaArtifact`. -/ def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Token) | 0, _ => none @@ -283,6 +475,11 @@ def parseArtifact?Aux : Nat → List Token → Option (MetaArtifact × List Toke | some (m, Token.rparen :: rest') => some (MetaArtifact.cterm m, rest') | _ => none + | n+1, Token.lparen :: Token.ident "Infoductor.MetaArtifact.declAt" :: rest => + match parseSyntax?Aux n rest with + | some (s, Token.rparen :: rest') => + some (MetaArtifact.declAt s, rest') + | _ => none | _, _ => none -- ── Top-level wrappers ───────────────────────────────────────────────────── @@ -383,10 +580,98 @@ def MetaCTerm.toTokens : MetaCTerm → List Token toTokens t ++ [Token.rparen] | .empty => [Token.ident "Infoductor.MetaCTerm.empty"] +/-- Canonical token form for `String.Pos.Raw`. -/ +def stringPosRawToTokens (p : String.Pos.Raw) : List Token := + [Token.lparen, Token.ident "String.Pos.Raw.mk", + Token.numLit p.byteIdx, Token.rparen] + +/-- Canonical token form for `Substring.Raw`. -/ +def substringRawToTokens (s : Substring.Raw) : List Token := + [Token.lparen, Token.ident "Substring.Raw.mk", Token.strLit s.str] ++ + stringPosRawToTokens s.startPos ++ + stringPosRawToTokens s.stopPos ++ [Token.rparen] + +/-- Canonical token form for `Bool`. -/ +def boolToTokens : Bool → List Token + | true => [Token.ident "true"] + | false => [Token.ident "false"] + +/-- Canonical token form for `Lean.SourceInfo`. -/ +def sourceInfoToTokens : Lean.SourceInfo → List Token + | .original lead pos trail endPos => + [Token.lparen, Token.ident "Lean.SourceInfo.original"] ++ + substringRawToTokens lead ++ + stringPosRawToTokens pos ++ + substringRawToTokens trail ++ + stringPosRawToTokens endPos ++ [Token.rparen] + | .synthetic pos endPos canonical => + [Token.lparen, Token.ident "Lean.SourceInfo.synthetic"] ++ + stringPosRawToTokens pos ++ + stringPosRawToTokens endPos ++ + boolToTokens canonical ++ [Token.rparen] + | .none => [Token.ident "Lean.SourceInfo.none"] + +/-- Canonical token form for `List String` (used by `Preresolved.decl`). -/ +def stringListToTokens : List String → List Token + | [] => [Token.ident "List.nil"] + | s :: rest => + [Token.lparen, Token.ident "List.cons", Token.strLit s] ++ + stringListToTokens rest ++ [Token.rparen] + +/-- Canonical token form for `Lean.Syntax.Preresolved`. -/ +def preresolvedToTokens : Lean.Syntax.Preresolved → List Token + | .namespace ns => + [Token.lparen, Token.ident "Lean.Syntax.Preresolved.namespace"] ++ + nameToTokens ns ++ [Token.rparen] + | .decl n fields => + [Token.lparen, Token.ident "Lean.Syntax.Preresolved.decl"] ++ + nameToTokens n ++ stringListToTokens fields ++ [Token.rparen] + +/-- Canonical token form for `List Lean.Syntax.Preresolved`. -/ +def preresolvedListToTokens : List Lean.Syntax.Preresolved → List Token + | [] => [Token.ident "List.nil"] + | p :: rest => + [Token.lparen, Token.ident "List.cons"] ++ + preresolvedToTokens p ++ preresolvedListToTokens rest ++ [Token.rparen] + +-- Canonical token form for `Lean.Syntax`. Defined via `mutual` so +-- recursion is structural on the syntax tree (and on `List Lean.Syntax` +-- through `Array.toList`); no fuel parameter needed. Mirrors +-- `syntaxToLeanSource` from `Foundation.Meta`. +mutual + /-- Token form of a `Lean.Syntax`. -/ + def syntaxToTokens : Lean.Syntax → List Token + | .missing => [Token.ident "Lean.Syntax.missing"] + | .atom info val => + [Token.lparen, Token.ident "Lean.Syntax.atom"] ++ + sourceInfoToTokens info ++ [Token.strLit val, Token.rparen] + | .ident info rawVal val preresolved => + [Token.lparen, Token.ident "Lean.Syntax.ident"] ++ + sourceInfoToTokens info ++ + substringRawToTokens rawVal ++ + nameToTokens val ++ + preresolvedListToTokens preresolved ++ [Token.rparen] + | .node info kind args => + [Token.lparen, Token.ident "Lean.Syntax.node"] ++ + sourceInfoToTokens info ++ + nameToTokens kind ++ + syntaxListToTokens args.toList ++ [Token.rparen] + + /-- Token form of a `List Lean.Syntax` — encodes as a `List.cons` / + `List.nil` chain. -/ + def syntaxListToTokens : List Lean.Syntax → List Token + | [] => [Token.ident "List.nil"] + | s :: rest => + [Token.lparen, Token.ident "List.cons"] ++ + syntaxToTokens s ++ syntaxListToTokens rest ++ [Token.rparen] +end + def MetaArtifact.toTokens : MetaArtifact → List Token | .source s => [Token.lparen, Token.ident "Infoductor.MetaArtifact.source", Token.strLit s, Token.rparen] - | .declAt _ => [] -- not source-renderable; toTokens undefined for this arm + | .declAt s => + [Token.lparen, Token.ident "Infoductor.MetaArtifact.declAt"] ++ + syntaxToTokens s ++ [Token.rparen] | .cterm m => [Token.lparen, Token.ident "Infoductor.MetaArtifact.cterm"] ++ m.toTokens ++ [Token.rparen] | .refTo n => [Token.lparen, Token.ident "Infoductor.MetaArtifact.refTo"] ++ @@ -422,19 +707,72 @@ def cTermDepth : MetaCTerm → Nat | .transp _ A φ t => max (max (cTermDepth A) (classifierDepth φ)) (cTermDepth t) + 1 +/-- Depth measure for `String.Pos.Raw` — atomic 4-token form. -/ +def stringPosRawDepth (_ : String.Pos.Raw) : Nat := 1 + +/-- Depth measure for `Substring.Raw`. -/ +def substringRawDepth (_ : Substring.Raw) : Nat := 2 + +/-- Depth measure for `Lean.SourceInfo`. -/ +def sourceInfoDepth : Lean.SourceInfo → Nat + | .original _ _ _ _ => 3 + | .synthetic _ _ _ => 3 + | .none => 1 + +/-- Depth measure for `List String`. -/ +def stringListDepth : List String → Nat + | [] => 1 + | _ :: rest => stringListDepth rest + 1 + +/-- Depth measure for `Lean.Syntax.Preresolved`. -/ +def preresolvedDepth : Lean.Syntax.Preresolved → Nat + | .namespace n => nameDepth n + 1 + | .decl n fields => max (nameDepth n) (stringListDepth fields) + 1 + +/-- Depth measure for `List Lean.Syntax.Preresolved`. -/ +def preresolvedListDepth : List Lean.Syntax.Preresolved → Nat + | [] => 1 + | p :: rest => max (preresolvedDepth p) (preresolvedListDepth rest) + 1 + +-- Structural depth measure for `Lean.Syntax`. Defined via `mutual` +-- so recursion is structural on the syntax tree. No fuel parameter +-- needed — depth is a true structural function of the input. Includes +-- the depths of *all* sub-payloads (sourceInfo, substring, name, +-- preresolved-list) so the parser-correctness proof's bookkeeping is +-- direct. +mutual + /-- Structural depth on `Lean.Syntax`. -/ + def syntaxParseDepth : Lean.Syntax → Nat + | .missing => 1 + | .atom info _ => sourceInfoDepth info + 1 + | .ident info rawVal n preresolved => + max (sourceInfoDepth info) + (max (substringRawDepth rawVal) + (max (nameDepth n) (preresolvedListDepth preresolved))) + 1 + | .node info kind args => + max (sourceInfoDepth info) + (max (nameDepth kind) (syntaxListParseDepth args.toList)) + 1 + + /-- Structural depth on `List Lean.Syntax`. -/ + def syntaxListParseDepth : List Lean.Syntax → Nat + | [] => 1 + | s :: rest => max (syntaxParseDepth s) (syntaxListParseDepth rest) + 1 +end + def artifactDepth : MetaArtifact → Nat | .source _ => 2 - | .declAt _ => 0 -- not source-encodable; round-trip excluded + | .declAt s => syntaxParseDepth s + 1 | .cterm m => cTermDepth m + 1 | .refTo n => nameDepth n + 1 | .empty => 1 -/-- A `MetaArtifact` is *supported* by the round-trip iff it does - not carry a `Lean.Syntax` (which cannot be source-rendered). - All other arms are supported. -/ +/-- All `MetaArtifact` arms are now supported by the round-trip: + `.declAt` payloads are encoded constructively via the + `syntaxToTokens` / `parseSyntax?Aux` pair. Retained as a + Boolean predicate (constant `true`) for backward compatibility + with downstream code that depended on its existence. -/ def MetaArtifact.supported : MetaArtifact → Bool - | .declAt _ => false - | _ => true + | _ => true -- ── Parser correctness on toTokens (Phase 2) ────────────────────────────── -- Universal theorems witnessing that each parser, when fed the @@ -608,6 +946,261 @@ theorem parseMetaCTerm?Aux_correct : parseClassifier?Aux_correct φ _ f hφ, parseMetaCTerm?Aux_correct t _ f ht] +-- ── Correctness for Lean.Syntax mirror parsers ───────────────────────────── +-- Each parser proves: feeding the canonical token form of a value plus +-- arbitrary trailing tokens reconstructs the value, leaving the trail +-- untouched, provided fuel ≥ value's depth. + +theorem parseStringPosRaw?Aux_correct : + ∀ (p : String.Pos.Raw) (rest : List Token) (fuel : Nat), + fuel ≥ stringPosRawDepth p → + parseStringPosRaw?Aux fuel (stringPosRawToTokens p ++ rest) = some (p, rest) := by + intro p rest fuel h + rcases fuel with _ | f + · simp [stringPosRawDepth] at h + · simp [stringPosRawToTokens, parseStringPosRaw?Aux] + +theorem parseSubstringRaw?Aux_correct : + ∀ (s : Substring.Raw) (rest : List Token) (fuel : Nat), + fuel ≥ substringRawDepth s → + parseSubstringRaw?Aux fuel (substringRawToTokens s ++ rest) = some (s, rest) := by + intro s rest fuel h + rcases fuel with _ | f + · simp [substringRawDepth] at h + · simp only [substringRawToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSubstringRaw?Aux] + have h1 : f ≥ stringPosRawDepth s.startPos := by + simp [substringRawDepth, stringPosRawDepth] at h ⊢; omega + have h2 : f ≥ stringPosRawDepth s.stopPos := by + simp [substringRawDepth, stringPosRawDepth] at h ⊢; omega + simp only [parseStringPosRaw?Aux_correct s.startPos _ f h1, + parseStringPosRaw?Aux_correct s.stopPos _ f h2] + +theorem parseBool?Aux_correct : + ∀ (b : Bool) (rest : List Token), + parseBool?Aux (boolToTokens b ++ rest) = some (b, rest) := by + intro b rest + cases b <;> simp [boolToTokens, parseBool?Aux] + +theorem parseSourceInfo?Aux_correct : + ∀ (info : Lean.SourceInfo) (rest : List Token) (fuel : Nat), + fuel ≥ sourceInfoDepth info → + parseSourceInfo?Aux fuel (sourceInfoToTokens info ++ rest) = some (info, rest) := by + intro info rest fuel h + cases info with + | none => + rcases fuel with _ | f + · simp [sourceInfoDepth] at h + · simp [sourceInfoToTokens, parseSourceInfo?Aux] + | original lead pos trail endPos => + rcases fuel with _ | f + · simp [sourceInfoDepth] at h + · simp only [sourceInfoToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSourceInfo?Aux] + have hlead : f ≥ substringRawDepth lead := by + simp [sourceInfoDepth, substringRawDepth] at h ⊢; omega + have hpos : f ≥ stringPosRawDepth pos := by + simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega + have htrail : f ≥ substringRawDepth trail := by + simp [sourceInfoDepth, substringRawDepth] at h ⊢; omega + have hend : f ≥ stringPosRawDepth endPos := by + simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega + simp only [parseSubstringRaw?Aux_correct lead _ f hlead, + parseStringPosRaw?Aux_correct pos _ f hpos, + parseSubstringRaw?Aux_correct trail _ f htrail, + parseStringPosRaw?Aux_correct endPos _ f hend] + | synthetic pos endPos canonical => + rcases fuel with _ | f + · simp [sourceInfoDepth] at h + · simp only [sourceInfoToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSourceInfo?Aux] + have hpos : f ≥ stringPosRawDepth pos := by + simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega + have hend : f ≥ stringPosRawDepth endPos := by + simp [sourceInfoDepth, stringPosRawDepth] at h ⊢; omega + simp only [parseStringPosRaw?Aux_correct pos _ f hpos, + parseStringPosRaw?Aux_correct endPos _ f hend, + parseBool?Aux_correct canonical (Token.rparen :: rest)] + +theorem parseStringList?Aux_correct : + ∀ (l : List String) (rest : List Token) (fuel : Nat), + fuel ≥ stringListDepth l → + parseStringList?Aux fuel (stringListToTokens l ++ rest) = some (l, rest) + | [], rest, fuel, h => by + rcases fuel with _ | f + · simp [stringListDepth] at h + · simp [stringListToTokens, parseStringList?Aux] + | s :: tail, rest, fuel, h => by + rcases fuel with _ | f + · simp [stringListDepth] at h + · simp only [stringListToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseStringList?Aux] + have htail : f ≥ stringListDepth tail := by + simp [stringListDepth] at h; omega + simp only [parseStringList?Aux_correct tail _ f htail] + +theorem parsePreresolved?Aux_correct : + ∀ (p : Lean.Syntax.Preresolved) (rest : List Token) (fuel : Nat), + fuel ≥ preresolvedDepth p → + parsePreresolved?Aux fuel (preresolvedToTokens p ++ rest) = some (p, rest) + | .namespace ns, rest, fuel, h => by + rcases fuel with _ | f + · simp [preresolvedDepth] at h + · simp only [preresolvedToTokens, List.append_assoc, List.cons_append, + List.nil_append, parsePreresolved?Aux] + have hn : f ≥ nameDepth ns := by simp [preresolvedDepth] at h; omega + simp only [parseName?Aux_correct ns _ f hn] + | .decl n fields, rest, fuel, h => by + rcases fuel with _ | f + · simp [preresolvedDepth] at h + · simp only [preresolvedToTokens, List.append_assoc, List.cons_append, + List.nil_append, parsePreresolved?Aux] + have hn : f ≥ nameDepth n := by simp [preresolvedDepth] at h; omega + have hf : f ≥ stringListDepth fields := by simp [preresolvedDepth] at h; omega + simp only [parseName?Aux_correct n _ f hn, + parseStringList?Aux_correct fields _ f hf] + +theorem parsePreresolvedList?Aux_correct : + ∀ (l : List Lean.Syntax.Preresolved) (rest : List Token) (fuel : Nat), + fuel ≥ preresolvedListDepth l → + parsePreresolvedList?Aux fuel (preresolvedListToTokens l ++ rest) = some (l, rest) + | [], rest, fuel, h => by + rcases fuel with _ | f + · simp [preresolvedListDepth] at h + · simp [preresolvedListToTokens, parsePreresolvedList?Aux] + | p :: tail, rest, fuel, h => by + rcases fuel with _ | f + · simp [preresolvedListDepth] at h + · simp only [preresolvedListToTokens, List.append_assoc, List.cons_append, + List.nil_append, parsePreresolvedList?Aux] + have hp : f ≥ preresolvedDepth p := by simp [preresolvedListDepth] at h; omega + have ht : f ≥ preresolvedListDepth tail := by simp [preresolvedListDepth] at h; omega + simp only [parsePreresolved?Aux_correct p _ f hp, + parsePreresolvedList?Aux_correct tail _ f ht] + +/-- Round-trip for the joint syntax / list-syntax parser. The encoder + is structural (no fuel) — the parser still needs fuel because it + consumes a token list of unknown length; fuel ≥ structural depth + of the value being parsed. -/ +theorem parseSyntaxOrList?Aux_correct : + ∀ (fuel : Nat) (x : Lean.Syntax ⊕ List Lean.Syntax) (rest : List Token), + fuel ≥ Sum.elim syntaxParseDepth syntaxListParseDepth x → + parseSyntaxOrList?Aux fuel + (Sum.elim (fun _ => true) (fun _ => false) x) + (Sum.elim syntaxToTokens syntaxListToTokens x ++ rest) = some (x, rest) + | 0, x, _, h => by + cases x with + | inl s => + simp only [Sum.elim_inl] at h + cases s <;> simp [syntaxParseDepth] at h + | inr l => + simp only [Sum.elim_inr] at h + cases l <;> simp [syntaxListParseDepth] at h + | f + 1, .inl .missing, rest, _ => by + simp [syntaxToTokens, parseSyntaxOrList?Aux] + | f + 1, .inl (.atom info val), rest, h => by + simp only [Sum.elim_inl, syntaxToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSyntaxOrList?Aux] + have hi : f ≥ sourceInfoDepth info := by + simp only [Sum.elim_inl, syntaxParseDepth] at h; omega + simp only [parseSourceInfo?Aux_correct info _ f hi] + | f + 1, .inl (.ident info rawVal val preresolved), rest, h => by + simp only [Sum.elim_inl, syntaxToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSyntaxOrList?Aux] + simp only [Sum.elim_inl, syntaxParseDepth] at h + -- Manually decompose the four-arm max: f + 1 ≥ max info (max raw (max name pre)) + 1. + have hi : f ≥ sourceInfoDepth info := by + have := Nat.le_max_left (sourceInfoDepth info) + (max (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved))) + omega + have hr : f ≥ substringRawDepth rawVal := by + have h1 := Nat.le_max_right (sourceInfoDepth info) + (max (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved))) + have h2 := Nat.le_max_left (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved)) + omega + have hn : f ≥ nameDepth val := by + have h1 := Nat.le_max_right (sourceInfoDepth info) + (max (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved))) + have h2 := Nat.le_max_right (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved)) + have h3 := Nat.le_max_left (nameDepth val) (preresolvedListDepth preresolved) + omega + have hp : f ≥ preresolvedListDepth preresolved := by + have h1 := Nat.le_max_right (sourceInfoDepth info) + (max (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved))) + have h2 := Nat.le_max_right (substringRawDepth rawVal) + (max (nameDepth val) (preresolvedListDepth preresolved)) + have h3 := Nat.le_max_right (nameDepth val) (preresolvedListDepth preresolved) + omega + simp only [parseSourceInfo?Aux_correct info _ f hi, + parseSubstringRaw?Aux_correct rawVal _ f hr, + parseName?Aux_correct val _ f hn, + parsePreresolvedList?Aux_correct preresolved _ f hp] + | f + 1, .inl (.node info kind args), rest, h => by + simp only [Sum.elim_inl, syntaxToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSyntaxOrList?Aux] + simp only [Sum.elim_inl, syntaxParseDepth] at h + have hi : f ≥ sourceInfoDepth info := by + have := Nat.le_max_left (sourceInfoDepth info) + (max (nameDepth kind) (syntaxListParseDepth args.toList)) + omega + have hk : f ≥ nameDepth kind := by + have h1 := Nat.le_max_right (sourceInfoDepth info) + (max (nameDepth kind) (syntaxListParseDepth args.toList)) + have h2 := Nat.le_max_left (nameDepth kind) (syntaxListParseDepth args.toList) + omega + have hargs_d : f ≥ syntaxListParseDepth args.toList := by + have h1 := Nat.le_max_right (sourceInfoDepth info) + (max (nameDepth kind) (syntaxListParseDepth args.toList)) + have h2 := Nat.le_max_right (nameDepth kind) (syntaxListParseDepth args.toList) + omega + have ih := parseSyntaxOrList?Aux_correct f (.inr args.toList) + (Token.rparen :: rest) (by simp only [Sum.elim_inr]; exact hargs_d) + simp only [Sum.elim_inr] at ih + simp only [parseSourceInfo?Aux_correct info _ f hi, + parseName?Aux_correct kind _ f hk, ih, Array.toArray_toList] + | f + 1, .inr [], rest, _ => by + simp [syntaxListToTokens, parseSyntaxOrList?Aux] + | f + 1, .inr (s :: tail), rest, h => by + simp only [Sum.elim_inr, syntaxListToTokens, List.append_assoc, List.cons_append, + List.nil_append, parseSyntaxOrList?Aux] + simp only [Sum.elim_inr, syntaxListParseDepth] at h + have hs_d : f ≥ syntaxParseDepth s := by + have := Nat.le_max_left (syntaxParseDepth s) (syntaxListParseDepth tail) + omega + have ht_d : f ≥ syntaxListParseDepth tail := by + have := Nat.le_max_right (syntaxParseDepth s) (syntaxListParseDepth tail) + omega + have ihs := parseSyntaxOrList?Aux_correct f (.inl s) + (syntaxListToTokens tail ++ Token.rparen :: rest) + (by simp only [Sum.elim_inl]; exact hs_d) + have iht := parseSyntaxOrList?Aux_correct f (.inr tail) + (Token.rparen :: rest) + (by simp only [Sum.elim_inr]; exact ht_d) + simp only [Sum.elim_inl] at ihs + simp only [Sum.elim_inr] at iht + rw [ihs] + simp only [iht] + +/-- Specialisation: round-trip for `parseSyntax?Aux` projection. -/ +theorem parseSyntax?Aux_correct : + ∀ (s : Lean.Syntax) (rest : List Token) (fuel : Nat), + fuel ≥ syntaxParseDepth s → + parseSyntax?Aux fuel (syntaxToTokens s ++ rest) = some (s, rest) := by + intro s rest fuel h + show (match parseSyntaxOrList?Aux fuel true (syntaxToTokens s ++ rest) with + | some (.inl s, r) => some (s, r) + | _ => none) = some (s, rest) + have := parseSyntaxOrList?Aux_correct fuel (.inl s) rest + (by simp only [Sum.elim_inl]; exact h) + simp only [Sum.elim_inl] at this + rw [this] + theorem parseArtifact?Aux_correct : ∀ (a : MetaArtifact) (rest : List Token) (fuel : Nat), a.supported = true → @@ -635,8 +1228,16 @@ theorem parseArtifact?Aux_correct : List.nil_append, parseArtifact?Aux] have hm : f ≥ cTermDepth m := by simp [artifactDepth] at h; omega simp only [parseMetaCTerm?Aux_correct m _ f hm] - | .declAt _, _, _, hsup, _ => by - simp [MetaArtifact.supported] at hsup + | .declAt s, rest, fuel, _, h => by + rcases fuel with _ | f + · simp [artifactDepth] at h + · simp only [MetaArtifact.toTokens, List.append_assoc, List.cons_append, + List.nil_append, parseArtifact?Aux] + -- The inner parser is now called at `f` fuel (we'll fix the + -- parser body to use `f` rather than `syntaxFuelCap` once below). + have hs : f ≥ syntaxParseDepth s := by + simp [artifactDepth] at h; omega + rw [parseSyntax?Aux_correct s (Token.rparen :: rest) f hs] -- ── Token-level round-trip — the canonical universal theorems ───────────── -- These follow directly from the `parser?Aux_correct` lemmas above @@ -719,6 +1320,86 @@ theorem cTermToTokens_length_bound (t : MetaCTerm) : have := classifierToTokens_length_bound φ omega +-- Length-vs-depth for the helper-syntax encoders. + +theorem stringPosRawToTokens_length_bound (p : String.Pos.Raw) : + (stringPosRawToTokens p).length + 1 ≥ stringPosRawDepth p := by + simp [stringPosRawToTokens, stringPosRawDepth] + +theorem substringRawToTokens_length_bound (s : Substring.Raw) : + (substringRawToTokens s).length + 1 ≥ substringRawDepth s := by + simp [substringRawToTokens, substringRawDepth, stringPosRawToTokens] + +theorem sourceInfoToTokens_length_bound (info : Lean.SourceInfo) : + (sourceInfoToTokens info).length + 1 ≥ sourceInfoDepth info := by + cases info <;> + simp [sourceInfoToTokens, sourceInfoDepth, substringRawToTokens, + stringPosRawToTokens, boolToTokens] + +theorem stringListToTokens_length_bound (l : List String) : + (stringListToTokens l).length + 1 ≥ stringListDepth l := by + induction l with + | nil => simp [stringListToTokens, stringListDepth] + | cons s rest ih => simp [stringListToTokens, stringListDepth]; omega + +theorem preresolvedToTokens_length_bound : + ∀ (p : Lean.Syntax.Preresolved), + (preresolvedToTokens p).length + 1 ≥ preresolvedDepth p + | .namespace ns => by + simp [preresolvedToTokens, preresolvedDepth] + have := nameToTokens_length_bound ns; omega + | .decl n fields => by + simp [preresolvedToTokens, preresolvedDepth] + have h1 := nameToTokens_length_bound n + have h2 := stringListToTokens_length_bound fields + omega + +theorem preresolvedListToTokens_length_bound (l : List Lean.Syntax.Preresolved) : + (preresolvedListToTokens l).length + 1 ≥ preresolvedListDepth l := by + induction l with + | nil => simp [preresolvedListToTokens, preresolvedListDepth] + | cons p rest ih => + simp [preresolvedListToTokens, preresolvedListDepth] + have := preresolvedToTokens_length_bound p + omega + +-- Length-vs-depth for the syntax encoder. Joint mutual recursion +-- because the encoder and depth function are themselves mutually +-- recursive on `Lean.Syntax` / `List Lean.Syntax`. +mutual + /-- Length bound on a `Lean.Syntax` encoding. -/ + theorem syntaxToTokens_length_bound : + ∀ (s : Lean.Syntax), (syntaxToTokens s).length + 1 ≥ syntaxParseDepth s + | .missing => by simp [syntaxToTokens, syntaxParseDepth] + | .atom info _ => by + simp [syntaxToTokens, syntaxParseDepth] + have := sourceInfoToTokens_length_bound info + omega + | .ident info rawVal val preresolved => by + simp [syntaxToTokens, syntaxParseDepth] + have h1 := sourceInfoToTokens_length_bound info + have h2 := substringRawToTokens_length_bound rawVal + have h3 := nameToTokens_length_bound val + have h4 := preresolvedListToTokens_length_bound preresolved + omega + | .node info kind args => by + simp [syntaxToTokens, syntaxParseDepth] + have h1 := sourceInfoToTokens_length_bound info + have h2 := nameToTokens_length_bound kind + have h3 := syntaxListToTokens_length_bound args.toList + omega + + /-- Length bound on a `List Lean.Syntax` encoding. -/ + theorem syntaxListToTokens_length_bound : + ∀ (l : List Lean.Syntax), (syntaxListToTokens l).length + 1 ≥ syntaxListParseDepth l + | [] => by simp [syntaxListToTokens, syntaxListParseDepth] + | s :: rest => by + simp [syntaxListToTokens, syntaxListParseDepth] + have h1 := syntaxToTokens_length_bound s + have h2 := syntaxListToTokens_length_bound rest + omega +end + /-- Every meta-mirror value's canonical token form parses back to the same value. Universal — by structural induction on the meta-mirror type, with sufficient fuel guaranteed by the @@ -756,7 +1437,10 @@ theorem artifactFromTokens?_round_trip (a : MetaArtifact) cases a with | empty => simp [MetaArtifact.toTokens, artifactDepth] | source s => simp [MetaArtifact.toTokens, artifactDepth] - | declAt _ => simp [MetaArtifact.supported] at hsup + | declAt s => + simp [MetaArtifact.toTokens, artifactDepth] + have := syntaxToTokens_length_bound s + omega | cterm m => simp [MetaArtifact.toTokens, artifactDepth] have := cTermToTokens_length_bound m; omega