From 2412d52536d90a088d9df10c2bc05b8acff600d1 Mon Sep 17 00:00:00 2001 From: Kyle Miller Date: Wed, 23 Jul 2025 13:57:06 -0700 Subject: [PATCH] feat: add hygiene info to paren/tuple/typeAscription syntaxes (#9491) This PR adds hygiene info to paren/tuple/typeAscription syntaxes, which will be used to implement hygienic cdot function expansion in #9443. --- src/Lean/Elab/BuiltinNotation.lean | 18 +++++------ src/Lean/Elab/Extra.lean | 2 +- src/Lean/Elab/Notation.lean | 2 +- src/Lean/Elab/Quotation/Precheck.lean | 9 ++++-- src/Lean/Elab/Term.lean | 4 +-- src/Lean/Parser/Extra.lean | 18 ++++++----- src/Lean/Parser/Term.lean | 30 ++++++++++++++----- stage0/src/stdlib_flags.h | 2 +- tests/lean/StxQuot.lean.expected.out | 4 +-- .../lean/interactive/hover.lean.expected.out | 2 +- tests/lean/notationDelab.lean | 7 +++++ tests/lean/run/Reparen.lean | 3 ++ 12 files changed, 68 insertions(+), 33 deletions(-) diff --git a/src/Lean/Elab/BuiltinNotation.lean b/src/Lean/Elab/BuiltinNotation.lean index 0243a21995..1e77c666cd 100644 --- a/src/Lean/Elab/BuiltinNotation.lean +++ b/src/Lean/Elab/BuiltinNotation.lean @@ -290,8 +290,8 @@ where extra state, and return it. Otherwise, we just return `stx`. -/ go : Syntax → StateT (Array Ident) MacroM Syntax - | stx@`(($(_))) => pure stx - | stx@`(·) => do + | stx@`($_:hygienicLParen$(_))) => pure stx + | stx@`($_:cdot) => do let name ← MonadQuotation.addMacroScope <| Name.mkSimple s!"x{(← get).size + 1}" let id := mkIdentFrom stx name (canonical := true) modify (fun s => s.push id) @@ -347,33 +347,33 @@ def elabCDotFunctionAlias? (stx : Term) : TermElabM (Option Expr) := do where expandCDotArg? (stx : Term) : MacroM (Option Term) := match stx with - | `(($e)) => Term.expandCDot? e + | `($_:hygienicLParen$e)) => Term.expandCDot? e | _ => Term.expandCDot? stx @[builtin_macro Lean.Parser.Term.paren] def expandParen : Macro - | `(($e)) => return (← expandCDot? e).getD e + | `($_:hygienicLParen$e)) => return (← expandCDot? e).getD e | _ => Macro.throwUnsupported @[builtin_macro Lean.Parser.Term.tuple] def expandTuple : Macro - | `(()) => ``(Unit.unit) - | `(($e, $es,*)) => do + | `($_:hygienicLParen)) => ``(Unit.unit) + | `($_:hygienicLParen $e, $es,*)) => do let pairs ← mkPairs (#[e] ++ es) return (← expandCDot? pairs).getD pairs | _ => Macro.throwUnsupported @[builtin_macro Lean.Parser.Term.typeAscription] def expandTypeAscription : Macro - | `(($e : $(type)?)) => do + | `($_:hygienicLParen$e : $(type)?)) => do match (← expandCDot? e) with | some e => `(($e : $(type)?)) | none => Macro.throwUnsupported | _ => Macro.throwUnsupported @[builtin_term_elab typeAscription] def elabTypeAscription : TermElab - | `(($e : $type)), _ => do + | `($_:hygienicLParen$e : $type)), _ => do let type ← withSynthesize (postpone := .yes) <| elabType type let e ← elabTerm e type ensureHasType type e - | `(($e :)), expectedType? => do + | `($_:hygienicLParen$e :)), expectedType? => do let e ← withSynthesize (postpone := .no) <| elabTerm e none ensureHasType expectedType? e | _, _ => throwUnsupportedSyntax diff --git a/src/Lean/Elab/Extra.lean b/src/Lean/Elab/Extra.lean index b9791d6de5..56854d9bef 100644 --- a/src/Lean/Elab/Extra.lean +++ b/src/Lean/Elab/Extra.lean @@ -194,7 +194,7 @@ where | `(unop% $f $arg) => processUnOp s f arg | `(leftact% $f $lhs $rhs) => processBinOp s .leftact f lhs rhs | `(rightact% $f $lhs $rhs) => processBinOp s .rightact f lhs rhs - | `(($e)) => + | `($_:hygienicLParen $e)) => if hasCDot e then processLeaf s else diff --git a/src/Lean/Elab/Notation.lean b/src/Lean/Elab/Notation.lean index 69dd1c239f..ba6fc5ab3b 100644 --- a/src/Lean/Elab/Notation.lean +++ b/src/Lean/Elab/Notation.lean @@ -62,7 +62,7 @@ def removeParenthesesAux (parens body : Syntax) : Syntax := partial def removeParentheses (stx : Syntax) : MacroM Syntax := do match stx with - | `(($e)) => pure $ removeParenthesesAux stx (←removeParentheses $ (←Term.expandCDot? e).getD e) + | `($_:hygienicLParen $e)) => pure $ removeParenthesesAux stx (←removeParentheses $ (←Term.expandCDot? e).getD e) | _ => match stx with | .node info kind args => pure $ .node info kind (←args.mapM removeParentheses) diff --git a/src/Lean/Elab/Quotation/Precheck.lean b/src/Lean/Elab/Quotation/Precheck.lean index 49d85f7c8f..19245327d1 100644 --- a/src/Lean/Elab/Quotation/Precheck.lean +++ b/src/Lean/Elab/Quotation/Precheck.lean @@ -73,6 +73,8 @@ where | stx => if stx.isAnyAntiquot then false + else if stx.isOfKind hygieneInfoKind then + false else stx.getArgs.any hasQuotedIdent @@ -122,10 +124,10 @@ private def isSectionVariable (e : Expr) : TermElabM Bool := do | _ => throwUnsupportedSyntax @[builtin_quot_precheck Lean.Parser.Term.typeAscription] def precheckTypeAscription : Precheck - | `(($e : $type)) => do + | `($_:hygienicLParen $e : $type)) => do precheck e precheck type - | `(($e :)) => precheck e + | `($_:hygienicLParen $e :)) => precheck e | _ => throwUnsupportedSyntax @[builtin_quot_precheck Lean.Parser.Term.explicit] def precheckExplicit : Precheck @@ -176,6 +178,9 @@ section ExpressionTree | `(unop% $f $a) => do precheck f; precheck a | _ => throwUnsupportedSyntax +@[builtin_quot_precheck Lean.Parser.Term.hygieneInfo] def precheckHygieneInfo : Precheck + | _ => pure () + end ExpressionTree end Lean.Elab.Term.Quotation diff --git a/src/Lean/Elab/Term.lean b/src/Lean/Elab/Term.lean index 67d4f6af79..1a100df371 100644 --- a/src/Lean/Elab/Term.lean +++ b/src/Lean/Elab/Term.lean @@ -1503,7 +1503,7 @@ private def isLambdaWithImplicit (stx : Syntax) : Bool := private partial def dropTermParens : Syntax → Syntax := fun stx => match stx with - | `(($stx)) => dropTermParens stx + | `($_:hygienicLParen $stx)) => dropTermParens stx | _ => stx private def isHole (stx : Syntax) : Bool := @@ -1521,7 +1521,7 @@ private def isNoImplicitLambda (stx : Syntax) : Bool := private def isTypeAscription (stx : Syntax) : Bool := match stx with - | `(($_ : $[$_]?)) => true + | `($_:hygienicLParen $_ : $[$_]?)) => true | _ => false def hasNoImplicitLambdaAnnotation (type : Expr) : Bool := diff --git a/src/Lean/Parser/Extra.lean b/src/Lean/Parser/Extra.lean index 7c2f39b918..287e76f57d 100644 --- a/src/Lean/Parser/Extra.lean +++ b/src/Lean/Parser/Extra.lean @@ -84,18 +84,22 @@ You can use `TSyntax.getId` to extract the name from the resulting syntax object @[run_builtin_parser_attribute_hooks, builtin_doc] def rawIdent : Parser := withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot -/-- The parser `hygieneInfo` parses no text, but captures the current macro scope information -as though it parsed an identifier at the current position. It returns a `hygieneInfoKind` node -around an `.ident` which is `Name.anonymous` but with macro scopes like a regular identifier. +/-- +The parser `hygieneInfo` parses no text, but creates a `hygineInfoKind` node +containing an anonymous identifier as if it were parsed at the current position. +This identifier is modified by syntax quotations to add macro scopes like a regular identifier. This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=` -substitutes for the identifier which would normally go there as in `have x :=`, so that we -can expand `have :=` to `have this :=` while retaining the usual macro name resolution behavior. +collects macro scopes, which we can apply to `this` when expanding to `have this := ...`. See [the language reference](lean-manual://section/macro-hygiene) for more information about macro hygiene. -This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier. -You can use `TSyntax.getHygieneInfo` to extract the name from the resulting syntax object. -/ +This is also used to implement cdot functions such as `(1 + ·)`. The opening parenthesis contains +a `hygieneInfo` node as does the cdot, which lets cdot expansion hygienically associate parentheses to cdots. + +This parser has arity 1: it produces a `hygieneInfoKind` node containing an anonymous `Syntax.ident`. +You can use `HygieneInfo.mkIdent` to create an `Ident` from the syntax object, +but you can also use `TSyntax.getHygieneInfo` to get the raw name from the identifier. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def hygieneInfo : Parser := withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot diff --git a/src/Lean/Parser/Term.lean b/src/Lean/Parser/Term.lean index acede7a2eb..45b0228b22 100644 --- a/src/Lean/Parser/Term.lean +++ b/src/Lean/Parser/Term.lean @@ -237,12 +237,28 @@ See also the `sorry` tactic, which is short for `exact sorry`. -/ @[builtin_term_parser] def «sorry» := leading_parser "sorry" +-- Left parenthesis with hygiene info, for cdot function expansion. +-- This is a pseudokind for bootstrapping purposes. +def hygienicLParen : Parser := + withAntiquot (mkAntiquot "hygienicLParen" decl_name% (anonymous := false) (isPseudoKind := true)) <| + leadingNode decl_name% (eval_prec max) ("(" >> hygieneInfo) +-- TODO(kmill): remove this formatter after stage0 update +open PrettyPrinter.Formatter Syntax.MonadTraverser in +@[combinator_formatter Lean.Parser.Term.hygienicLParen] +def hygienicLParen.formatter : PrettyPrinter.Formatter := do + let info := (← getCur).getHeadInfo + withMaybeTag info.getPos? (pushToken info "(" false) + goLeft +@[combinator_parenthesizer Lean.Parser.Term.hygienicLParen] +def hygienicLParen.parenthesizer : PrettyPrinter.Parenthesizer := do + PrettyPrinter.Parenthesizer.visitToken /-- A placeholder for an implicit lambda abstraction's variable. The lambda abstraction is scoped to the surrounding parentheses. -For example, `(· + ·)` is equivalent to `fun x y => x + y`. +For example, `(· + ·)` is equivalent to `fun x y => x + y`. Tuple notation and type ascription notation also serve as scopes. +Note that `(· : ty)` expands to `((fun x => x) : ty)`, so `ty` should be a function type. -/ -@[builtin_term_parser] def cdot := leading_parser - symbol "·" <|> "." +@[builtin_term_parser] def cdot := leading_parser + unicodeSymbol "·" "." >> hygieneInfo /-- Type ascription notation: `(0 : Int)` instructs Lean to process `0` as a value of type `Int`. An empty type ascription `(e :)` elaborates `e` without the expected type. @@ -250,11 +266,11 @@ This is occasionally useful when Lean's heuristics for filling arguments from th do not yield the right result. -/ @[builtin_term_parser] def typeAscription := leading_parser - "(" >> (withoutPosition (withoutForbidden (termParser >> " :" >> optional (ppSpace >> termParser)))) >> ")" + hygienicLParen >> (withoutPosition (withoutForbidden (termParser >> " :" >> optional (ppSpace >> termParser)))) >> ")" /-- Tuple notation; `()` is short for `Unit.unit`, `(a, b, c)` for `Prod.mk a (Prod.mk b c)`, etc. -/ @[builtin_term_parser] def tuple := leading_parser - "(" >> optional (withoutPosition (withoutForbidden (termParser >> ", " >> sepBy1 termParser ", " (allowTrailingSep := true)))) >> ")" + hygienicLParen >> optional (withoutPosition (withoutForbidden (termParser >> ", " >> sepBy1 termParser ", " (allowTrailingSep := true)))) >> ")" recommended_spelling "mk" for "(a, b)" in [Prod.mk, tuple] @@ -265,10 +281,10 @@ Can also be used for creating simple functions when combined with `·`. Here are - `(· + ·)` is shorthand for `fun x y => x + y` - `(f · a b)` is shorthand for `fun x => f x a b` - `(h (· + 1) ·)` is shorthand for `fun x => h (fun y => y + 1) x` - - also applies to other parentheses-like notations such as `(·, 1)` + - also applies to other parentheses-like notations such as `(·, 1)` and `(· : Nat → Nat)` -/ @[builtin_term_parser] def paren := leading_parser - "(" >> withoutPosition (withoutForbidden (ppDedentIfGrouped termParser)) >> ")" + hygienicLParen >> withoutPosition (withoutForbidden (ppDedentIfGrouped termParser)) >> ")" /-- The *anonymous constructor* `⟨e, ...⟩` is equivalent to `c e ...` if the expected type is an inductive type with a single constructor `c`. diff --git a/stage0/src/stdlib_flags.h b/stage0/src/stdlib_flags.h index 79a0e58edd..b935a80ab1 100644 --- a/stage0/src/stdlib_flags.h +++ b/stage0/src/stdlib_flags.h @@ -19,7 +19,7 @@ options get_default_options() { opts = opts.update({"internal", "parseQuotWithCurrentStage"}, true); // changes to builtin parsers may also require toggling the following option if macros/syntax // with custom precheck hooks were affected - opts = opts.update({"quotPrecheck"}, true); + opts = opts.update({"quotPrecheck"}, false); opts = opts.update({"pp", "rawOnError"}, true); #endif diff --git a/tests/lean/StxQuot.lean.expected.out b/tests/lean/StxQuot.lean.expected.out index a8bba34230..084fa32053 100644 --- a/tests/lean/StxQuot.lean.expected.out +++ b/tests/lean/StxQuot.lean.expected.out @@ -24,8 +24,8 @@ StxQuot.lean:19:15-19:16: error: unexpected token ']'; expected term 0 1 "1" -"(Term.fun\n \"fun\"\n (Term.basicFun\n [`a._@.UnhygienicMain._hyg.1\n (Term.typeAscription \"(\" `b._@.UnhygienicMain._hyg.1 \":\" [`Nat._@.UnhygienicMain._hyg.1] \")\")]\n []\n \"=>\"\n (num \"1\")))" -"#[(Term.typeAscription \"(\" `a._@.UnhygienicMain._hyg.1 \":\" [`Nat._@.UnhygienicMain._hyg.1] \")\"), `b._@.UnhygienicMain._hyg.1]" +"(Term.fun\n \"fun\"\n (Term.basicFun\n [`a._@.UnhygienicMain._hyg.1\n (Term.typeAscription\n (Term.hygienicLParen \"(\" (hygieneInfo `_@.UnhygienicMain._hyg.1))\n `b._@.UnhygienicMain._hyg.1\n \":\"\n [`Nat._@.UnhygienicMain._hyg.1]\n \")\")]\n []\n \"=>\"\n (num \"1\")))" +"#[(Term.typeAscription\n (Term.hygienicLParen \"(\" (hygieneInfo `_@.UnhygienicMain._hyg.1))\n `a._@.UnhygienicMain._hyg.1\n \":\"\n [`Nat._@.UnhygienicMain._hyg.1]\n \")\"), `b._@.UnhygienicMain._hyg.1]" "`a._@.UnhygienicMain._hyg.1" "(Term.forall \"∀\" [(Term.hole \"_\")] [] \",\" `c._@.UnhygienicMain._hyg.1)" "(Term.hole \"_\")" diff --git a/tests/lean/interactive/hover.lean.expected.out b/tests/lean/interactive/hover.lean.expected.out index af5aad8515..2d5f8b7910 100644 --- a/tests/lean/interactive/hover.lean.expected.out +++ b/tests/lean/interactive/hover.lean.expected.out @@ -394,7 +394,7 @@ "end": {"line": 203, "character": 32}}, "contents": {"value": - "```lean\nId ℕ\n```\n***\nParentheses, used for grouping expressions (e.g., `a * (b + c)`).\nCan also be used for creating simple functions when combined with `·`. Here are some examples:\n - `(· + 1)` is shorthand for `fun x => x + 1`\n - `(· + ·)` is shorthand for `fun x y => x + y`\n - `(f · a b)` is shorthand for `fun x => f x a b`\n - `(h (· + 1) ·)` is shorthand for `fun x => h (fun y => y + 1) x`\n - also applies to other parentheses-like notations such as `(·, 1)`\n", + "```lean\nId ℕ\n```\n***\nParentheses, used for grouping expressions (e.g., `a * (b + c)`).\nCan also be used for creating simple functions when combined with `·`. Here are some examples:\n - `(· + 1)` is shorthand for `fun x => x + 1`\n - `(· + ·)` is shorthand for `fun x y => x + y`\n - `(f · a b)` is shorthand for `fun x => f x a b`\n - `(h (· + 1) ·)` is shorthand for `fun x => h (fun y => y + 1) x`\n - also applies to other parentheses-like notations such as `(·, 1)` and `(· : Nat → Nat)`\n", "kind": "markdown"}} {"textDocument": {"uri": "file:///hover.lean"}, "position": {"line": 206, "character": 8}} diff --git a/tests/lean/notationDelab.lean b/tests/lean/notationDelab.lean index 3476d0d155..f9cb9033bd 100644 --- a/tests/lean/notationDelab.lean +++ b/tests/lean/notationDelab.lean @@ -1,3 +1,10 @@ +-- TODO(kmill) remove after stage0 update +@[app_unexpander Unit.unit] meta def unexpandUnit' : Lean.PrettyPrinter.Unexpander + | `($(_)) => `(()) +@[app_unexpander Prod.mk] meta def unexpandProdMk' : Lean.PrettyPrinter.Unexpander + | `($(_) $x ($y, $ys,*)) => `(($x, $y, $ys,*)) + | `($(_) $x $y) => `(($x, $y)) + | _ => throw () notation "unitTest " x => Prod.mk x () #check unitTest 42 diff --git a/tests/lean/run/Reparen.lean b/tests/lean/run/Reparen.lean index e8c5f36ec9..7c30c4d66b 100644 --- a/tests/lean/run/Reparen.lean +++ b/tests/lean/run/Reparen.lean @@ -4,6 +4,9 @@ import Lean.Parser open Lean open Std.Format open Std +-- TODO(kmill): re-enable after stage0 update +#exit + def unparenAux (parens body : Syntax) : Syntax := match parens.getHeadInfo, body.getHeadInfo, body.getTailInfo, parens.getTailInfo with | SourceInfo.original lead _ _ _, SourceInfo.original _ pos trail pos',