feat: add hygiene info to paren/tuple/typeAscription syntaxes (#9491)

This PR adds hygiene info to paren/tuple/typeAscription syntaxes, which will be used to implement hygienic cdot function expansion in #9443.
2025-07-23 13:57:06 -07:00 · 2025-07-23 13:57:06 -07:00 · 2412d52536
commit 2412d52536
parent e686d040ea
12 changed files with 68 additions and 33 deletions
--- a/src/Lean/Elab/BuiltinNotation.lean
+++ b/src/Lean/Elab/BuiltinNotation.lean
@ -290,8 +290,8 @@ where
  extra state, and return it. Otherwise, we just return `stx`.
  -/
  go : Syntax → StateT (Array Ident) MacroM Syntax
-  | stx@`(($(_))) => pure stx
-  | stx@`(·) => do
+  | stx@`($_:hygienicLParen$(_))) => pure stx
+  | stx@`($_:cdot) => do
    let name ← MonadQuotation.addMacroScope <| Name.mkSimple s!"x{(← get).size + 1}"
    let id := mkIdentFrom stx name (canonical := true)
    modify (fun s => s.push id)
@ -347,33 +347,33 @@ def elabCDotFunctionAlias? (stx : Term) : TermElabM (Option Expr) := do
 where
  expandCDotArg? (stx : Term) : MacroM (Option Term) :=
    match stx with
-    | `(($e)) => Term.expandCDot? e
+    | `($_:hygienicLParen$e)) => Term.expandCDot? e
    | _ => Term.expandCDot? stx

@[builtin_macro Lean.Parser.Term.paren] def expandParen : Macro
-  | `(($e)) => return (← expandCDot? e).getD e
+  | `($_:hygienicLParen$e)) => return (← expandCDot? e).getD e
  | _       => Macro.throwUnsupported

@[builtin_macro Lean.Parser.Term.tuple] def expandTuple : Macro
-  | `(()) => ``(Unit.unit)
-  | `(($e, $es,*)) => do
+  | `($_:hygienicLParen)) => ``(Unit.unit)
+  | `($_:hygienicLParen $e, $es,*)) => do
    let pairs ← mkPairs (#[e] ++ es)
    return (← expandCDot? pairs).getD pairs
  | _ => Macro.throwUnsupported

@[builtin_macro Lean.Parser.Term.typeAscription] def expandTypeAscription : Macro
-  | `(($e : $(type)?)) => do
+  | `($_:hygienicLParen$e : $(type)?)) => do
    match (← expandCDot? e) with
    | some e => `(($e : $(type)?))
    | none   => Macro.throwUnsupported
  | _ => Macro.throwUnsupported

@[builtin_term_elab typeAscription] def elabTypeAscription : TermElab
-  | `(($e : $type)), _ => do
+  | `($_:hygienicLParen$e : $type)), _ => do
    let type ← withSynthesize (postpone := .yes) <| elabType type
    let e ← elabTerm e type
    ensureHasType type e
-  | `(($e :)), expectedType? => do
+  | `($_:hygienicLParen$e :)), expectedType? => do
    let e ← withSynthesize (postpone := .no) <| elabTerm e none
    ensureHasType expectedType? e
  | _, _ => throwUnsupportedSyntax
--- a/src/Lean/Elab/Extra.lean
+++ b/src/Lean/Elab/Extra.lean
@ -194,7 +194,7 @@ where
    | `(unop% $f $arg) => processUnOp s f arg
    | `(leftact% $f $lhs $rhs) => processBinOp s .leftact f lhs rhs
    | `(rightact% $f $lhs $rhs) => processBinOp s .rightact f lhs rhs
-    | `(($e)) =>
+    | `($_:hygienicLParen $e)) =>
      if hasCDot e then
        processLeaf s
      else
--- a/src/Lean/Elab/Notation.lean
+++ b/src/Lean/Elab/Notation.lean
@ -62,7 +62,7 @@ def removeParenthesesAux (parens body : Syntax) : Syntax :=

 partial def removeParentheses (stx : Syntax) : MacroM Syntax := do
  match stx with
-  | `(($e)) => pure $ removeParenthesesAux stx (←removeParentheses $ (←Term.expandCDot? e).getD e)
+  | `($_:hygienicLParen $e)) => pure $ removeParenthesesAux stx (←removeParentheses $ (←Term.expandCDot? e).getD e)
  | _ =>
    match stx with
    | .node info kind args => pure $ .node info kind (←args.mapM removeParentheses)
--- a/src/Lean/Elab/Quotation/Precheck.lean
+++ b/src/Lean/Elab/Quotation/Precheck.lean
@ -73,6 +73,8 @@ where
    | stx =>
      if stx.isAnyAntiquot then
        false
+      else if stx.isOfKind hygieneInfoKind then
+        false
      else
        stx.getArgs.any hasQuotedIdent

@ -122,10 +124,10 @@ private def isSectionVariable (e : Expr) : TermElabM Bool := do
  | _ => throwUnsupportedSyntax

@[builtin_quot_precheck Lean.Parser.Term.typeAscription] def precheckTypeAscription : Precheck
-  | `(($e : $type)) => do
+  | `($_:hygienicLParen $e : $type)) => do
    precheck e
    precheck type
-  | `(($e :)) => precheck e
+  | `($_:hygienicLParen $e :)) => precheck e
  | _ => throwUnsupportedSyntax

@[builtin_quot_precheck Lean.Parser.Term.explicit] def precheckExplicit : Precheck
@ -176,6 +178,9 @@ section ExpressionTree
  | `(unop% $f $a) => do precheck f; precheck a
  | _ => throwUnsupportedSyntax

+@[builtin_quot_precheck Lean.Parser.Term.hygieneInfo] def precheckHygieneInfo : Precheck
+  | _ => pure ()
+
 end ExpressionTree

 end Lean.Elab.Term.Quotation
--- a/src/Lean/Elab/Term.lean
+++ b/src/Lean/Elab/Term.lean
@ -1503,7 +1503,7 @@ private def isLambdaWithImplicit (stx : Syntax) : Bool :=

 private partial def dropTermParens : Syntax → Syntax := fun stx =>
  match stx with
-  | `(($stx)) => dropTermParens stx
+  | `($_:hygienicLParen $stx)) => dropTermParens stx
  | _         => stx

 private def isHole (stx : Syntax) : Bool :=
@ -1521,7 +1521,7 @@ private def isNoImplicitLambda (stx : Syntax) : Bool :=

 private def isTypeAscription (stx : Syntax) : Bool :=
  match stx with
-  | `(($_ : $[$_]?)) => true
+  | `($_:hygienicLParen $_ : $[$_]?)) => true
  | _                => false

 def hasNoImplicitLambdaAnnotation (type : Expr) : Bool :=
--- a/src/Lean/Parser/Extra.lean
+++ b/src/Lean/Parser/Extra.lean
@ -84,18 +84,22 @@ You can use `TSyntax.getId` to extract the name from the resulting syntax object
@[run_builtin_parser_attribute_hooks, builtin_doc] def rawIdent : Parser :=
  withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot

-/-- The parser `hygieneInfo` parses no text, but captures the current macro scope information
-as though it parsed an identifier at the current position. It returns a `hygieneInfoKind` node
-around an `.ident` which is `Name.anonymous` but with macro scopes like a regular identifier.
+/--
+The parser `hygieneInfo` parses no text, but creates a `hygineInfoKind` node
+containing an anonymous identifier as if it were parsed at the current position.
+This identifier is modified by syntax quotations to add macro scopes like a regular identifier.

 This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=`
-substitutes for the identifier which would normally go there as in `have x :=`, so that we
-can expand `have :=` to `have this :=` while retaining the usual macro name resolution behavior.
+collects macro scopes, which we can apply to `this` when expanding to `have this := ...`.
 See [the language reference](lean-manual://section/macro-hygiene) for more information about
 macro hygiene.

-This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier.
-You can use `TSyntax.getHygieneInfo` to extract the name from the resulting syntax object. -/
+This is also used to implement cdot functions such as `(1 + ·)`. The opening parenthesis contains
+a `hygieneInfo` node as does the cdot, which lets cdot expansion hygienically associate parentheses to cdots.
+
+This parser has arity 1: it produces a `hygieneInfoKind` node containing an anonymous `Syntax.ident`.
+You can use `HygieneInfo.mkIdent` to create an `Ident` from the syntax object,
+but you can also use `TSyntax.getHygieneInfo` to get the raw name from the identifier. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def hygieneInfo : Parser :=
  withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot

--- a/src/Lean/Parser/Term.lean
+++ b/src/Lean/Parser/Term.lean
@ -237,12 +237,28 @@ See also the `sorry` tactic, which is short for `exact sorry`.
 -/
@[builtin_term_parser] def «sorry» := leading_parser
  "sorry"
+-- Left parenthesis with hygiene info, for cdot function expansion.
+-- This is a pseudokind for bootstrapping purposes.
+def hygienicLParen : Parser :=
+  withAntiquot (mkAntiquot "hygienicLParen" decl_name% (anonymous := false) (isPseudoKind := true)) <|
+    leadingNode decl_name% (eval_prec max) ("(" >> hygieneInfo)
+-- TODO(kmill): remove this formatter after stage0 update
+open PrettyPrinter.Formatter Syntax.MonadTraverser in
+@[combinator_formatter Lean.Parser.Term.hygienicLParen]
+def hygienicLParen.formatter : PrettyPrinter.Formatter := do
+  let info := (← getCur).getHeadInfo
+  withMaybeTag info.getPos? (pushToken info "(" false)
+  goLeft
+@[combinator_parenthesizer Lean.Parser.Term.hygienicLParen]
+def hygienicLParen.parenthesizer : PrettyPrinter.Parenthesizer := do
+  PrettyPrinter.Parenthesizer.visitToken
 /--
 A placeholder for an implicit lambda abstraction's variable. The lambda abstraction is scoped to the surrounding parentheses.
-For example, `(· + ·)` is equivalent to `fun x y => x + y`.
+For example, `(· + ·)` is equivalent to `fun x y => x + y`. Tuple notation and type ascription notation also serve as scopes.
+Note that `(· : ty)` expands to `((fun x => x) : ty)`, so `ty` should be a function type.
 -/
-@[builtin_term_parser] def cdot   := leading_parser
-  symbol "·" <|> "."
+@[builtin_term_parser] def cdot := leading_parser
+  unicodeSymbol "·" "." >> hygieneInfo
 /--
 Type ascription notation: `(0 : Int)` instructs Lean to process `0` as a value of type `Int`.
 An empty type ascription `(e :)` elaborates `e` without the expected type.
@ -250,11 +266,11 @@ This is occasionally useful when Lean's heuristics for filling arguments from th
 do not yield the right result.
 -/
@[builtin_term_parser] def typeAscription := leading_parser
-  "(" >> (withoutPosition (withoutForbidden (termParser >> " :" >> optional (ppSpace >> termParser)))) >> ")"
+  hygienicLParen >> (withoutPosition (withoutForbidden (termParser >> " :" >> optional (ppSpace >> termParser)))) >> ")"

 /-- Tuple notation; `()` is short for `Unit.unit`, `(a, b, c)` for `Prod.mk a (Prod.mk b c)`, etc. -/
@[builtin_term_parser] def tuple := leading_parser
-  "(" >> optional (withoutPosition (withoutForbidden (termParser >> ", " >> sepBy1 termParser ", " (allowTrailingSep := true)))) >> ")"
+  hygienicLParen >> optional (withoutPosition (withoutForbidden (termParser >> ", " >> sepBy1 termParser ", " (allowTrailingSep := true)))) >> ")"

 recommended_spelling "mk" for "(a, b)" in [Prod.mk, tuple]

@ -265,10 +281,10 @@ Can also be used for creating simple functions when combined with `·`. Here are
  - `(· + ·)` is shorthand for `fun x y => x + y`
  - `(f · a b)` is shorthand for `fun x => f x a b`
  - `(h (· + 1) ·)` is shorthand for `fun x => h (fun y => y + 1) x`
-  - also applies to other parentheses-like notations such as `(·, 1)`
+  - also applies to other parentheses-like notations such as `(·, 1)` and `(· : Nat → Nat)`
 -/
@[builtin_term_parser] def paren := leading_parser
-  "(" >> withoutPosition (withoutForbidden (ppDedentIfGrouped termParser)) >> ")"
+  hygienicLParen >> withoutPosition (withoutForbidden (ppDedentIfGrouped termParser)) >> ")"
 /--
 The *anonymous constructor* `⟨e, ...⟩` is equivalent to `c e ...` if the
 expected type is an inductive type with a single constructor `c`.
--- a/stage0/src/stdlib_flags.h
+++ b/stage0/src/stdlib_flags.h
@ -19,7 +19,7 @@ options get_default_options() {
    opts = opts.update({"internal", "parseQuotWithCurrentStage"}, true);
    // changes to builtin parsers may also require toggling the following option if macros/syntax
    // with custom precheck hooks were affected
-    opts = opts.update({"quotPrecheck"}, true);
+    opts = opts.update({"quotPrecheck"}, false);

    opts = opts.update({"pp", "rawOnError"}, true);
 #endif
--- a/tests/lean/StxQuot.lean.expected.out
+++ b/tests/lean/StxQuot.lean.expected.out
@ -24,8 +24,8 @@ StxQuot.lean:19:15-19:16: error: unexpected token ']'; expected term
 0
 1
 "1"
-"(Term.fun\n \"fun\"\n (Term.basicFun\n  [`a._@.UnhygienicMain._hyg.1\n   (Term.typeAscription \"(\" `b._@.UnhygienicMain._hyg.1 \":\" [`Nat._@.UnhygienicMain._hyg.1] \")\")]\n  []\n  \"=>\"\n  (num \"1\")))"
-"#[(Term.typeAscription \"(\" `a._@.UnhygienicMain._hyg.1 \":\" [`Nat._@.UnhygienicMain._hyg.1] \")\"), `b._@.UnhygienicMain._hyg.1]"
+"(Term.fun\n \"fun\"\n (Term.basicFun\n  [`a._@.UnhygienicMain._hyg.1\n   (Term.typeAscription\n    (Term.hygienicLParen \"(\" (hygieneInfo `_@.UnhygienicMain._hyg.1))\n    `b._@.UnhygienicMain._hyg.1\n    \":\"\n    [`Nat._@.UnhygienicMain._hyg.1]\n    \")\")]\n  []\n  \"=>\"\n  (num \"1\")))"
+"#[(Term.typeAscription\n (Term.hygienicLParen \"(\" (hygieneInfo `_@.UnhygienicMain._hyg.1))\n `a._@.UnhygienicMain._hyg.1\n \":\"\n [`Nat._@.UnhygienicMain._hyg.1]\n \")\"), `b._@.UnhygienicMain._hyg.1]"
 "`a._@.UnhygienicMain._hyg.1"
 "(Term.forall \"∀\" [(Term.hole \"_\")] [] \",\" `c._@.UnhygienicMain._hyg.1)"
 "(Term.hole \"_\")"
--- a/tests/lean/interactive/hover.lean.expected.out
+++ b/tests/lean/interactive/hover.lean.expected.out
@ -394,7 +394,7 @@
  "end": {"line": 203, "character": 32}},
 "contents":
 {"value":
-  "```lean\nId ℕ\n```\n***\nParentheses, used for grouping expressions (e.g., `a * (b + c)`).\nCan also be used for creating simple functions when combined with `·`. Here are some examples:\n  - `(· + 1)` is shorthand for `fun x => x + 1`\n  - `(· + ·)` is shorthand for `fun x y => x + y`\n  - `(f · a b)` is shorthand for `fun x => f x a b`\n  - `(h (· + 1) ·)` is shorthand for `fun x => h (fun y => y + 1) x`\n  - also applies to other parentheses-like notations such as `(·, 1)`\n",
+  "```lean\nId ℕ\n```\n***\nParentheses, used for grouping expressions (e.g., `a * (b + c)`).\nCan also be used for creating simple functions when combined with `·`. Here are some examples:\n  - `(· + 1)` is shorthand for `fun x => x + 1`\n  - `(· + ·)` is shorthand for `fun x y => x + y`\n  - `(f · a b)` is shorthand for `fun x => f x a b`\n  - `(h (· + 1) ·)` is shorthand for `fun x => h (fun y => y + 1) x`\n  - also applies to other parentheses-like notations such as `(·, 1)` and `(· : Nat → Nat)`\n",
  "kind": "markdown"}}
 {"textDocument": {"uri": "file:///hover.lean"},
 "position": {"line": 206, "character": 8}}
--- a/tests/lean/notationDelab.lean
+++ b/tests/lean/notationDelab.lean
@ -1,3 +1,10 @@
+-- TODO(kmill) remove after stage0 update
+@[app_unexpander Unit.unit] meta def unexpandUnit' : Lean.PrettyPrinter.Unexpander
+  | `($(_)) => `(())
+@[app_unexpander Prod.mk] meta def unexpandProdMk' : Lean.PrettyPrinter.Unexpander
+  | `($(_) $x ($y, $ys,*)) => `(($x, $y, $ys,*))
+  | `($(_) $x $y)          => `(($x, $y))
+  | _                      => throw ()
 notation "unitTest " x => Prod.mk x ()

 #check unitTest 42
--- a/tests/lean/run/Reparen.lean
+++ b/tests/lean/run/Reparen.lean
@ -4,6 +4,9 @@ import Lean.Parser
 open Lean
 open Std.Format open Std

+-- TODO(kmill): re-enable after stage0 update
+#exit
+
 def unparenAux (parens body : Syntax) : Syntax :=
 match parens.getHeadInfo, body.getHeadInfo, body.getTailInfo, parens.getTailInfo with
 | SourceInfo.original lead _ _ _, SourceInfo.original _ pos trail pos',