feat: make cdot expansion take hygiene into account (#9443)

This PR makes cdot function expansion take hygiene information into
account, fixing "parenthesis capturing" errors that can make erroneous
cdots trigger cdot expansion in conjunction with macros. For example,
given
```lean
macro "baz% " t:term : term => `(1 + ($t))
```
it used to be that `baz% ·` would expand to `1 + fun x => x`, but now
the parentheses in `($t)` do not capture the cdot. We also fix an
oversight where cdot function expansion ignored the fact that type
ascriptions and tuples were supposed to delimit expansion, and also now
the quotation prechecker ignores the identifier in `hygieneInfo`. (#9491
added the hygiene information to the parenthesis and cdot syntaxes.)

This fixes a bug discovered by [Google
DeepMind](https://storage.googleapis.com/deepmind-media/DeepMind.com/Blog/imo-2024-solutions/P1/index.html),
which made use of `useλy . x=>y.rec λS p=>?_`. The `use` tactic from
Mathlib wrapped the provided term in a type ascription, and so this was
equivalent to `use fun x => λy x x=>y.rec λS p=>?_`. (Note that cdot
function expansion is not able to take into account *where* the cdots
are located, and it is syntactically valid to insert an identifier into
the binder list like this. If we ever want to address this in the
future, we could have cdots expand into a special term that wraps an
identifier that evaluates to a local, but which would cause errors in
other contexts.)

Design note: we put the `hygieneInfo` on the open parenthesis rather
than at the end, since that way the hygiene information is available
even when there are parsing errors. This is important since we rely on
being able to elaborate partial syntax to get elab info (e.g. in `(a.`
to get completion info). Note that syntax matchers check that the
`hygieneInfo` is actually present, so such partial syntax would not be
matched.
This commit is contained in:
Kyle Miller 2025-07-23 17:43:32 -07:00 committed by GitHub
parent 8a0d036e82
commit d45cc674ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 142 additions and 83 deletions

View file

@ -168,7 +168,7 @@ theorem max?_le_iff [Max α] [LE α]
-- This could be refactored by designing appropriate typeclasses to replace `le_refl`, `max_eq_or`,
-- and `le_min_iff`.
theorem max?_eq_some_iff [Max α] [LE α] [anti : Std.Antisymm ((· : α) ≤ ·)]
theorem max?_eq_some_iff [Max α] [LE α] [anti : Std.Antisymm (· ≤ · : αα → Prop)]
(le_refl : ∀ a : α, a ≤ a)
(max_eq_or : ∀ a b : α, max a b = a max a b = b)
(max_le_iff : ∀ a b c : α, max b c ≤ a ↔ b ≤ a ∧ c ≤ a) {xs : List α} :

View file

@ -254,64 +254,96 @@ partial def mkMPairs (elems : Array Term) : MacroM Term :=
pure acc
loop (elems.size - 1) elems.back!
section
open Parser
open Parser in
partial def hasCDot : Syntax → Bool
| Syntax.node _ k args =>
if k == ``Term.paren || k == ``Term.typeAscription || k == ``Term.tuple then false
else if k == ``Term.cdot then true
else args.any hasCDot
def isCDotBinderKind (k : SyntaxNodeKind) : Bool :=
k == ``Term.paren || k == ``Term.typeAscription || k == ``Term.tuple
/--
Returns whether or not this cdot is for the given `HygieneInfo` name;
if no hygiene info is given, then matches any cdot, no matter its hygieneInfo.
-/
def isCDotForInfo (s : Syntax) (hygieneInfo? : Option Name) : Bool :=
match s with
| `(· $h:hygieneInfo) =>
if let some info := hygieneInfo? then
h.getHygieneInfo == info
else
true
| _ => false
/--
Return `some` if succeeded expanding `·` notation occurring in
the given syntax. Otherwise, return `none`.
Examples:
- `· + 1` => `fun x => x + 1`
- `f · · b` => `fun x1 x2 => f x1 x2 b` -/
partial def expandCDot? (stx : Term) : MacroM (Option Term) := do
if hasCDot stx then
Returns true if the given expression contains a cdot for the given `HygieneInfo` name.
If no `HygieneInfo` name is given, then returns true if there is any cdot.
The search is delimited by cdot binders (any syntax satisfying `isCDotBinderKind`).
-/
partial def hasCDot : Syntax → Option Name → Bool
| s@(Syntax.node _ k args), info? =>
if isCDotBinderKind k then false
else if isCDotForInfo s info? then true
else args.any (fun s => hasCDot s info?)
| _, _ => false
end
/--
If the term contains any cdots that match the given `HygieneInfo` name (see `isCDotForInfo`),
then returns `some` with the expanded syntax, otherwise returns `none`.
Examples:
- `· + 1` => `fun x => x + 1`
- `f · · b` => `fun x1 x2 => f x1 x2 b`
-/
partial def expandCDot? (stx : Term) (hygieneInfo? : Option Name) : MacroM (Option Term) := do
if hasCDot stx hygieneInfo? then
withFreshMacroScope do
let mut (newStx, binders) ← (go stx).run #[]
if binders.size == 1 then
-- It is nicer using `x` over `x1` if there's only a single binder.
-- It is nicer using `x` over `x1` if there is only a single binder.
let x1 := binders[0]!
let x := mkIdentFrom x1 (← MonadQuotation.addMacroScope `x) (canonical := true)
let x ← mkVarFrom x1 "x"
binders := binders.set! 0 x
newStx ← newStx.replaceM fun s => pure (if s == x1 then x else none)
`(fun $binders* => $(⟨newStx⟩))
else
pure none
where
mkVarFrom (ref : Syntax) (s : String) : MacroM Ident := do
-- We do not incorporate the hygieneInfo macro scopes into the variable name.
-- We could, but since cdot function binders identifiers are not supposed to cross stages it would not enable anything.
let name ← MonadQuotation.addMacroScope <| Name.mkSimple s
return mkIdentFrom ref name (canonical := true)
/--
Auxiliary function for expanding the `·` notation.
The extra state `Array Syntax` contains the new binder names.
If `stx` is a `·`, we create a fresh identifier, store it in the
extra state, and return it. Otherwise, we just return `stx`.
-/
go : Syntax → StateT (Array Ident) MacroM Syntax
| stx@`($_:hygienicLParen$(_))) => pure stx
| stx@`($_:cdot) => do
let name ← MonadQuotation.addMacroScope <| Name.mkSimple s!"x{(← get).size + 1}"
let id := mkIdentFrom stx name (canonical := true)
modify (fun s => s.push id)
pure id
| stx => match stx with
go (stx : Syntax) : StateT (Array Ident) MacroM Syntax :=
match stx with
| .node _ k args => do
let args ←
if k == choiceKind then
if args.isEmpty then
return stx
let s ← get
let args' ← args.mapM (fun arg => go arg |>.run s)
let s' := args'[0]!.2
unless args'.all (fun (_, s'') => s''.size == s'.size) do
Macro.throwErrorAt stx "Ambiguous notation in cdot function has different numbers of '·' arguments in each alternative."
set s'
pure <| args'.map Prod.fst
else
args.mapM go
return .node (.fromRef stx (canonical := true)) k args
if isCDotForInfo stx hygieneInfo? then
let id ← mkVarFrom stx s!"x{(← get).size + 1}"
modify (fun s => s.push id)
pure id
else if isCDotBinderKind k then
pure stx
else
let args ←
if k == choiceKind then
if args.isEmpty then
return stx
let s ← get
let args' ← args.mapM (fun arg => go arg |>.run s)
let s' := args'[0]!.2
unless args'.all (fun (_, s'') => s''.size == s'.size) do
Macro.throwErrorAt stx "Ambiguous notation in cdot function has different numbers of '·' arguments in each alternative."
set s'
pure <| args'.map Prod.fst
else
args.mapM go
return .node (.fromRef stx (canonical := true)) k args
| _ => pure stx
/--
@ -347,33 +379,33 @@ def elabCDotFunctionAlias? (stx : Term) : TermElabM (Option Expr) := do
where
expandCDotArg? (stx : Term) : MacroM (Option Term) :=
match stx with
| `($_:hygienicLParen$e)) => Term.expandCDot? e
| _ => Term.expandCDot? stx
| `(($h:hygieneInfo $e)) => Term.expandCDot? e h.getHygieneInfo
| _ => Term.expandCDot? stx none
@[builtin_macro Lean.Parser.Term.paren] def expandParen : Macro
| `($_:hygienicLParen$e)) => return (← expandCDot? e).getD e
| `(($h:hygieneInfo $e)) => return (← expandCDot? e h.getHygieneInfo).getD e
| _ => Macro.throwUnsupported
@[builtin_macro Lean.Parser.Term.tuple] def expandTuple : Macro
| `($_:hygienicLParen)) => ``(Unit.unit)
| `($_:hygienicLParen $e, $es,*)) => do
| `(()) => ``(Unit.unit)
| `(($h:hygieneInfo $e, $es,*)) => do
let pairs ← mkPairs (#[e] ++ es)
return (← expandCDot? pairs).getD pairs
return (← expandCDot? pairs h.getHygieneInfo).getD pairs
| _ => Macro.throwUnsupported
@[builtin_macro Lean.Parser.Term.typeAscription] def expandTypeAscription : Macro
| `($_:hygienicLParen$e : $(type)?)) => do
match (← expandCDot? e) with
| `(($h:hygieneInfo $e : $(type)?)) => do
match (← expandCDot? e h.getHygieneInfo) with
| some e => `(($e : $(type)?))
| none => Macro.throwUnsupported
| _ => Macro.throwUnsupported
@[builtin_term_elab typeAscription] def elabTypeAscription : TermElab
| `($_:hygienicLParen$e : $type)), _ => do
| `(($e : $type)), _ => do
let type ← withSynthesize (postpone := .yes) <| elabType type
let e ← elabTerm e type
ensureHasType type e
| `($_:hygienicLParen$e :)), expectedType? => do
| `(($e :)), expectedType? => do
let e ← withSynthesize (postpone := .no) <| elabTerm e none
ensureHasType expectedType? e
| _, _ => throwUnsupportedSyntax

View file

@ -194,8 +194,8 @@ where
| `(unop% $f $arg) => processUnOp s f arg
| `(leftact% $f $lhs $rhs) => processBinOp s .leftact f lhs rhs
| `(rightact% $f $lhs $rhs) => processBinOp s .rightact f lhs rhs
| `($_:hygienicLParen $e)) =>
if hasCDot e then
| `(($h:hygieneInfo $e)) =>
if hasCDot e h.getHygieneInfo then
processLeaf s
else
go e

View file

@ -62,7 +62,7 @@ def removeParenthesesAux (parens body : Syntax) : Syntax :=
partial def removeParentheses (stx : Syntax) : MacroM Syntax := do
match stx with
| `($_:hygienicLParen $e)) => pure $ removeParenthesesAux stx (←removeParentheses $ (←Term.expandCDot? e).getD e)
| `(($h:hygieneInfo $e)) => pure $ removeParenthesesAux stx (← removeParentheses $ (← Term.expandCDot? e h.getHygieneInfo).getD e)
| _ =>
match stx with
| .node info kind args => pure $ .node info kind (←args.mapM removeParentheses)

View file

@ -124,10 +124,10 @@ private def isSectionVariable (e : Expr) : TermElabM Bool := do
| _ => throwUnsupportedSyntax
@[builtin_quot_precheck Lean.Parser.Term.typeAscription] def precheckTypeAscription : Precheck
| `($_:hygienicLParen $e : $type)) => do
| `(($e : $type)) => do
precheck e
precheck type
| `($_:hygienicLParen $e :)) => precheck e
| `(($e :)) => precheck e
| _ => throwUnsupportedSyntax
@[builtin_quot_precheck Lean.Parser.Term.explicit] def precheckExplicit : Precheck

View file

@ -1503,7 +1503,7 @@ private def isLambdaWithImplicit (stx : Syntax) : Bool :=
private partial def dropTermParens : Syntax → Syntax := fun stx =>
match stx with
| `($_:hygienicLParen $stx)) => dropTermParens stx
| `(($stx)) => dropTermParens stx
| _ => stx
private def isHole (stx : Syntax) : Bool :=
@ -1520,9 +1520,7 @@ private def isNoImplicitLambda (stx : Syntax) : Bool :=
| _ => false
private def isTypeAscription (stx : Syntax) : Bool :=
match stx with
| `($_:hygienicLParen $_ : $[$_]?)) => true
| _ => false
stx.isOfKind ``Parser.Term.typeAscription
def hasNoImplicitLambdaAnnotation (type : Expr) : Bool :=
annotation? `noImplicitLambda type |>.isSome

View file

@ -238,20 +238,8 @@ See also the `sorry` tactic, which is short for `exact sorry`.
@[builtin_term_parser] def «sorry» := leading_parser
"sorry"
-- Left parenthesis with hygiene info, for cdot function expansion.
-- This is a pseudokind for bootstrapping purposes.
def hygienicLParen : Parser :=
withAntiquot (mkAntiquot "hygienicLParen" decl_name% (anonymous := false) (isPseudoKind := true)) <|
leadingNode decl_name% (eval_prec max) ("(" >> hygieneInfo)
-- TODO(kmill): remove this formatter after stage0 update
open PrettyPrinter.Formatter Syntax.MonadTraverser in
@[combinator_formatter Lean.Parser.Term.hygienicLParen]
def hygienicLParen.formatter : PrettyPrinter.Formatter := do
let info := (← getCur).getHeadInfo
withMaybeTag info.getPos? (pushToken info "(" false)
goLeft
@[combinator_parenthesizer Lean.Parser.Term.hygienicLParen]
def hygienicLParen.parenthesizer : PrettyPrinter.Parenthesizer := do
PrettyPrinter.Parenthesizer.visitToken
def hygienicLParen : Parser := leading_parser (withAnonymousAntiquot := false)
"(" >> hygieneInfo
/--
A placeholder for an implicit lambda abstraction's variable. The lambda abstraction is scoped to the surrounding parentheses.
For example, `(· + ·)` is equivalent to `fun x y => x + y`. Tuple notation and type ascription notation also serve as scopes.

View file

@ -1,10 +1,3 @@
-- TODO(kmill) remove after stage0 update
@[app_unexpander Unit.unit] meta def unexpandUnit' : Lean.PrettyPrinter.Unexpander
| `($(_)) => `(())
@[app_unexpander Prod.mk] meta def unexpandProdMk' : Lean.PrettyPrinter.Unexpander
| `($(_) $x ($y, $ys,*)) => `(($x, $y, $ys,*))
| `($(_) $x $y) => `(($x, $y))
| _ => throw ()
notation "unitTest " x => Prod.mk x ()
#check unitTest 42

View file

@ -4,9 +4,6 @@ import Lean.Parser
open Lean
open Std.Format open Std
-- TODO(kmill): re-enable after stage0 update
#exit
def unparenAux (parens body : Syntax) : Syntax :=
match parens.getHeadInfo, body.getHeadInfo, body.getTailInfo, parens.getTailInfo with
| SourceInfo.original lead _ _ _, SourceInfo.original _ pos trail pos',
@ -22,6 +19,13 @@ partial def unparen : Syntax → Syntax
| `(level|($stx')) => unparenAux stx $ unparen stx'
| _ => stx.modifyArgs $ Array.map unparen
def clearHygieneInfo (stx : Syntax) : Syntax :=
Id.run <| stx.replaceM fun s => do
if s.isOfKind hygieneInfoKind then
return some <| s.setArg 0 (mkIdent .anonymous)
else
return none
unsafe def main (args : List String) : IO Unit := do
let (debug, f) : Bool × String := match args with
| [f, "-d"] => (true, f)
@ -43,10 +47,10 @@ cmds.forM $ fun cmd => do
def check (stx : Syntax) : CoreM Unit := do
let stx' := unparen stx;
let stx' ← PrettyPrinter.parenthesizeTerm stx';
let stx' ← clearHygieneInfo <$> PrettyPrinter.parenthesizeTerm stx';
let f ← PrettyPrinter.formatTerm stx';
IO.println f;
if (stx != stx') then
if (clearHygieneInfo stx != stx') then
throwError "reparenthesization failed"
open Lean

View file

@ -1,4 +1,8 @@
/-!
# Tests of cdot functions
-/
set_option pp.mvars false
class Inc (α : Type) :=
(inc : αα)
@ -43,3 +47,43 @@ notation "f" x => tag "2" x
/-- info: fun x => (f x).length : String → Nat -/
#guard_msgs in
#check (String.length <| f ·)
/-!
Check that cdots can't be captured in `let` functions
(there is a type ascription the body syntax is inserted into).
-/
/--
error: invalid occurrence of `·` notation, it must be surrounded by parentheses (e.g. `(· + 1)`)
---
error: invalid occurrence of `·` notation, it must be surrounded by parentheses (e.g. `(· + 1)`)
-/
#guard_msgs in
def a : Nat → Nat → Nat :=
let (add) : Nat → Nat → Nat := · + ·
add
/-!
Check that cdots can't be captured in macros.
-/
macro "baz% " t:term : term => `(1 + ($t))
/--
error: invalid occurrence of `·` notation, it must be surrounded by parentheses (e.g. `(· + 1)`)
---
info: 1 + sorry : Nat
-/
#guard_msgs in #check baz% ·
/-!
Note that cdots still work here since they are expanded before the inner macro is expanded.
-/
/-- info: fun x => 1 + x : Nat → Nat -/
#guard_msgs in #check (baz% ·)
/-!
Check that parentheses, type ascriptions, and tuples each delimit cdot expansion.
-/
/-- info: fun x => x ∘ fun x => x : (?_ → ?_) → ?_ → ?_ -/
#guard_msgs in #check (· ∘ (·))
/-- info: fun x => x ∘ fun x => x : (Nat → ?_) → Nat → ?_ -/
#guard_msgs in #check (· ∘ (· : Nat → Nat))
/-- info: fun x => (x, fun x_1 => (1, x_1)) : (x : ?_) → ?_ × (?_ x → Nat × ?_ x) -/
#guard_msgs in #check (·, (1, ·))