feat: add unicode(...) parser syntax and pp.unicode option (#10373)

This PR adds a `pp.unicode` option and a `unicode("→", "->")` syntax
description alias for the lower-level `unicodeSymbol "→" "->"` parser.
The syntax is added to the `notation` command as well. When `pp.unicode`
is true (the default) then the first form is used when pretty printing,
and otherwise the second ASCII form is used. A variant, `unicode("→",
"->", preserveForPP)` causes the `->` form to be preferred; delaborators
can insert `→` directly into the syntax, which will be pretty printed
as-is; this allows notations like `fun` to use custom options such as
`pp.unicode.fun` to opt into the unicode form when pretty printing.

Additionally:
- Adds more documentation for the `symbol` and `nonReservedSymbol`
parser descriptions.
- Adds documentation for the
`infix`/`infixr`/`infixl`/`prefix`/`postfix` commands.
- The parenthesizers for symbols are improved to backtrack if the atom
doesn't match.
- Fixes a bug where `&"..."` symbols aren't validated.

This is partial progress for issue #1056. What remains is enabling
`unicode(...)` for mixfix commands and then making use of it for core
notation.
This commit is contained in:
Kyle Miller 2025-09-13 21:40:03 -07:00 committed by GitHub
parent ed5dc328d9
commit 3e4fa12c72
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 275 additions and 56 deletions

View file

@ -5125,12 +5125,32 @@ inductive ParserDescr where
The precedence `prec` and `lhsPrec` are used to determine whether the parser
should apply. -/
| trailingNode (kind : SyntaxNodeKind) (prec lhsPrec : Nat) (p : ParserDescr)
/-- A literal symbol parser: parses `val` as a literal.
This parser does not work on identifiers, so `symbol` arguments are declared
as "keywords" and cannot be used as identifiers anywhere in the file. -/
/--
Parses the literal symbol.
The symbol is automatically included in the set of reserved tokens ("keywords").
Keywords cannot be used as identifiers, unless the identifier is otherwise escaped.
For example, `"fun"` reserves `fun` as a keyword; to refer an identifier named `fun` one can write `«fun»`.
Adding a `&` prefix prevents it from being reserved, for example `&"true"`.
Whitespace before or after the atom is used as a pretty printing hint.
For example, `" + "` parses `+` and pretty prints it with whitespace on both sides.
The whitespace has no effect on parsing behavior.
-/
| symbol (val : String)
/-- Like `symbol`, but without reserving `val` as a keyword.
If `includeIdent` is true then `ident` will be reinterpreted as `atom` if it matches. -/
/--
Parses a literal symbol. The `&` prefix prevents it from being included in the set of reserved tokens ("keywords").
This means that the symbol can still be recognized as an identifier by other parsers.
Some syntax categories, such as `tactic`, automatically apply `&` to the first symbol.
Whitespace before or after the atom is used as a pretty printing hint.
For example, `" + "` parses `+` and pretty prints it with whitespace on both sides.
The whitespace has no effect on parsing behavior.
(Not exposed by parser description syntax:
If the `includeIdent` argument is true, lets `ident` be reinterpreted as `atom` if it matches.)
-/
| nonReservedSymbol (val : String) (includeIdent : Bool)
/-- Parses using the category parser `catName` with right binding power
(i.e. precedence) `rbp`. -/
@ -5150,6 +5170,19 @@ inductive ParserDescr where
/-- `sepBy1` is just like `sepBy`, except it takes 1 or more instead of
0 or more occurrences of `p`. -/
| sepBy1 (p : ParserDescr) (sep : String) (psep : ParserDescr) (allowTrailingSep : Bool := false)
/--
- `unicode("→", "->")` parses a symbol matching either `→` or `->`. Each symbol is reserved.
The second symbol is an ASCII version of the first.
The `pp.unicode` option controls which is used when pretty printing.
- `unicode("→", "->", preserveForPP)` is the same except for pretty printing behavior.
When the `pp.unicode` option is enabled, then the pretty printer uses whichever symbol
matches the underlying atom in the syntax.
The intent is that `preserveForPP` means that the ASCII variant is preferred.
For example, `fun` notation uses `preserveForPP` for its arrow; the delaborator chooses
`↦` or `=>` depending on the value of `pp.unicode.fun`, letting users opt-in to formatting with `↦`.
Note that `notation` creates a pretty printer preferring the ASCII version.
-/
| unicodeSymbol (val asciiVal : String) (preserveForPP : Bool)
instance : Inhabited ParserDescr where
default := ParserDescr.symbol ""

View file

@ -424,6 +424,8 @@ private partial def containsAtom (e : Expr) (atom : String) : MetaM Bool := do
| (``ParserDescr.unary, #[.app _ (.lit (.strVal _)), p]) => containsAtom p atom
| (``ParserDescr.binary, #[.app _ (.lit (.strVal "andthen")), p, _]) => containsAtom p atom
| (``ParserDescr.nonReservedSymbol, #[.lit (.strVal tk), _]) => pure (tk.trim == atom)
| (``ParserDescr.unicodeSymbol, #[.lit (.strVal tk), .lit (.strVal asciiTk), _]) =>
pure (tk.trim == atom || asciiTk.trim == atom)
| (``ParserDescr.symbol, #[.lit (.strVal tk)]) => pure (tk.trim == atom)
| (``Parser.withAntiquot, #[_, p]) => containsAtom p atom
| (``Parser.leadingNode, #[_, _, p]) => containsAtom p atom
@ -432,6 +434,8 @@ private partial def containsAtom (e : Expr) (atom : String) : MetaM Bool := do
| (``Parser.nonReservedSymbol, #[.lit (.strVal tk), _]) => pure (tk.trim == atom)
| (``Parser.symbol, #[.lit (.strVal tk)]) => pure (tk.trim == atom)
| (``Parser.symbol, #[_nonlit]) => pure false
| (``Parser.unicodeSymbol, #[.lit (.strVal tk), .lit (.strVal asciiTk), _]) =>
pure (tk.trim == atom || asciiTk.trim == atom)
| (``Parser.withCache, #[_, p]) => containsAtom p atom
| _ => if tryWhnf then attempt (← Meta.whnf p) false else pure false
attempt e true

View file

@ -15,18 +15,18 @@ namespace Lean.Elab.Command
@[builtin_macro Lean.Parser.Command.mixfix] def expandMixfix : Macro := fun stx =>
withAttrKindGlobal stx fun stx => do
match stx with
| `($[$doc?:docComment]? $[@[$attrs?,*]]? infixl:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) =>
| `($[$doc?:docComment]? $[@[$attrs?,*]]? infixl:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) =>
let prec1 := quote <| (← evalPrec prec) + 1
`($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? lhs:$prec $op:str rhs:$prec1 => $f lhs rhs)
| `($[$doc?:docComment]? $[@[$attrs?,*]]? infix:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) =>
| `($[$doc?:docComment]? $[@[$attrs?,*]]? infix:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) =>
let prec1 := quote <| (← evalPrec prec) + 1
`($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? lhs:$prec1 $op:str rhs:$prec1 => $f lhs rhs)
| `($[$doc?:docComment]? $[@[$attrs?,*]]? infixr:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) =>
| `($[$doc?:docComment]? $[@[$attrs?,*]]? infixr:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) =>
let prec1 := quote <| (← evalPrec prec) + 1
`($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? lhs:$prec1 $op:str rhs:$prec => $f lhs rhs)
| `($[$doc?:docComment]? $[@[$attrs?,*]]? prefix:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) =>
| `($[$doc?:docComment]? $[@[$attrs?,*]]? prefix:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) =>
`($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str arg:$prec => $f arg)
| `($[$doc?:docComment]? $[@[$attrs?,*]]? postfix:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) =>
| `($[$doc?:docComment]? $[@[$attrs?,*]]? postfix:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) =>
`($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? arg:$prec $op:str => $f arg)
| _ => Macro.throwUnsupported
where

View file

@ -46,15 +46,30 @@ private partial def antiquote (vars : Array Syntax) : Syntax → Syntax
def expandNotationItemIntoSyntaxItem : TSyntax ``notationItem → MacroM (TSyntax `stx)
| `(notationItem| $_:ident$[:$prec?]?) => `(stx| term $[:$prec?]?)
| `(notationItem| $s:str) => `(stx| $s:str)
| _ => Macro.throwUnsupported
-- TODO(kmill): use after stage0 update
-- | `(notationItem| $u:unicodeAtom) => `(stx| $u:unicodeAtom)
-- | _ => Macro.throwUnsupported
| stx =>
if stx.raw.isOfKind ``Parser.Syntax.unicodeAtom then
return ⟨stx.raw⟩
else
Macro.throwUnsupported
/-- Convert `notation` command lhs item into a pattern element -/
def expandNotationItemIntoPattern (stx : Syntax) : MacroM Syntax :=
let k := stx.getKind
if k == `Lean.Parser.Command.identPrec then
if k == ``Lean.Parser.Command.identPrec then
return mkAntiquotNode stx[0] (kind := `term) (isPseudoKind := true)
else if k == strLitKind then
strLitToPattern stx
else if k == ``Parser.Syntax.unicodeAtom then
let preserveForPP := !stx[4].isNone
if preserveForPP then
-- Use the ascii atom for the pattern; that way delaboration gives the ASCII version,
-- which is the preferred form according to `ParserDescr.unicodeSymbol`.
strLitToPattern stx[3]
else
strLitToPattern stx[1]
else
Macro.throwUnsupported

View file

@ -136,6 +136,8 @@ where
processAtom stx
else if kind == ``Lean.Parser.Syntax.nonReserved then
processNonReserved stx
else if kind == ``Lean.Parser.Syntax.unicodeAtom then
processUnicode stx
else
let stxNew? ← liftM (liftMacroM (expandMacro? stx) : TermElabM _)
match stxNew? with
@ -248,23 +250,30 @@ where
!s.front.isDigit &&
!(s.any Char.isWhitespace)
validAtom (stx : Syntax) : ToParserDescrM String := do
let some atom := stx.isStrLit? | throwUnsupportedSyntax
unless isValidAtom atom do
throwErrorAt stx "invalid atom"
return atom
processAtom (stx : Syntax) := do
match stx[0].isStrLit? with
| some atom =>
unless isValidAtom atom do
throwErrorAt stx "invalid atom"
/- For syntax categories where initialized with `LeadingIdentBehavior` different from default (e.g., `tactic`), we automatically mark
the first symbol as nonReserved. -/
if (← read).behavior != Parser.LeadingIdentBehavior.default && (← read).first then
return (← `(ParserDescr.nonReservedSymbol $(quote atom) false), 1)
else
return (← `(ParserDescr.symbol $(quote atom)), 1)
| none => throwUnsupportedSyntax
let atom ← validAtom stx[0]
/- For syntax categories where initialized with `LeadingIdentBehavior` different from default (e.g., `tactic`), we automatically mark
the first symbol as nonReserved. -/
if (← read).behavior != Parser.LeadingIdentBehavior.default && (← read).first then
return (← `(ParserDescr.nonReservedSymbol $(quote atom) false), 1)
else
return (← `(ParserDescr.symbol $(quote atom)), 1)
processNonReserved (stx : Syntax) := do
let some atom := stx[1].isStrLit? | throwUnsupportedSyntax
let atom ← validAtom stx[1]
return (← `((with_annotate_term $(stx[0]) @ParserDescr.nonReservedSymbol) $(quote atom) false), 1)
processUnicode (stx : Syntax) := do
let atom ← validAtom stx[1]
let asciiAtom ← validAtom stx[3]
let preserveForPP := !stx[4].isNone
return (← `((with_annotate_term $(stx[0]) @ParserDescr.unicodeSymbol) $(quote atom) $(quote asciiAtom) $(quote preserveForPP)), 1)
end Term

View file

@ -92,6 +92,7 @@ unsafe def interpretParserDescr : ParserDescr → CoreM Parenthesizer
| ParserDescr.trailingNode k prec lhsPrec d => return trailingNode.parenthesizer k prec lhsPrec (← interpretParserDescr d)
| ParserDescr.symbol tk => return symbol.parenthesizer tk
| ParserDescr.nonReservedSymbol tk includeIdent => return nonReservedSymbol.parenthesizer tk includeIdent
| ParserDescr.unicodeSymbol tk asciiTk preserve => return unicodeSymbol.parenthesizer tk asciiTk preserve
| ParserDescr.parser constName => combinatorParenthesizerAttribute.runDeclFor constName
| ParserDescr.cat catName prec => return categoryParser.parenthesizer catName prec
@ -124,6 +125,7 @@ unsafe def interpretParserDescr : ParserDescr → CoreM Formatter
| ParserDescr.trailingNode k prec lhsPrec d => return trailingNode.formatter k prec lhsPrec (← interpretParserDescr d)
| ParserDescr.symbol tk => return symbol.formatter tk
| ParserDescr.nonReservedSymbol tk _ => return nonReservedSymbol.formatter tk
| ParserDescr.unicodeSymbol tk asciiTk preserve => return unicodeSymbol.formatter tk asciiTk preserve
| ParserDescr.parser constName => combinatorFormatterAttribute.runDeclFor constName
| ParserDescr.cat catName _ => return categoryParser.formatter catName

View file

@ -1229,7 +1229,8 @@ def unicodeSymbolInfo (sym asciiSym : String) : ParserInfo := {
def unicodeSymbolFn (sym asciiSym : String) : ParserFn :=
unicodeSymbolFnAux sym asciiSym ["'" ++ sym ++ "', '" ++ asciiSym ++ "'"]
def unicodeSymbolNoAntiquot (sym asciiSym : String) : Parser :=
set_option linter.unusedVariables false in
def unicodeSymbolNoAntiquot (sym asciiSym : String) (preserveForPP : Bool) : Parser :=
let sym := sym.trim
let asciiSym := asciiSym.trim
{ info := unicodeSymbolInfo sym asciiSym
@ -1786,8 +1787,19 @@ instance : Coe String Parser where
def nonReservedSymbol (sym : String) (includeIdent := false) : Parser :=
tokenWithAntiquot (nonReservedSymbolNoAntiquot sym includeIdent)
def unicodeSymbol (sym asciiSym : String) : Parser :=
tokenWithAntiquot (unicodeSymbolNoAntiquot sym asciiSym)
/--
`unicodeSymbol sym asciiSym` parses either `sym` or `asciiSym` as a reserved symbol.
The `asciiSym` argument should be an ASCII alternative for `sym`.
- If the `pp.unicode` option is false, then pretty prints using `asciiSym`.
- If the `pp.unicode` option is true, then pretty prints as `sym`,
unless the `preserveForPP` argument is true, in which case the underlying atom
is used to decide whether to print using the unicode or ASCII form.
We take the ASCII form to be the preferred form in this case.
For example, `pp.unicode.fun` causes the delaborator to use `↦` instead of `=>` for the `fun` arrow.
-/
def unicodeSymbol (sym asciiSym : String) (preserveForPP : Bool := false) : Parser :=
tokenWithAntiquot (unicodeSymbolNoAntiquot sym asciiSym preserveForPP)
/--
Define parser for `$e` (if `anonymous == true`) and `$e:name`.

View file

@ -291,6 +291,7 @@ partial def compileParserDescr (categories : ParserCategories) (d : ParserDescr)
| ParserDescr.trailingNode k prec lhsPrec d => return trailingNode k prec lhsPrec (← visit d)
| ParserDescr.symbol tk => return symbol tk
| ParserDescr.nonReservedSymbol tk includeIdent => return nonReservedSymbol tk includeIdent
| ParserDescr.unicodeSymbol tk asciiTk preserve => return unicodeSymbol tk asciiTk preserve
| ParserDescr.parser constName => do
let (_, p) ← mkParserOfConstantAux constName visit;
pure p

View file

@ -48,10 +48,15 @@ namespace Syntax
@[builtin_syntax_parser] def sepBy1 := leading_parser
"sepBy1(" >> withoutPosition (many1 syntaxParser >> ", " >> strLit >>
optional (", " >> many1 syntaxParser) >> optional (", " >> nonReservedSymbol "allowTrailingSep")) >> ")"
@[builtin_syntax_parser] def atom := leading_parser
@[builtin_syntax_parser, inherit_doc ParserDescr.symbol]
def atom := leading_parser
strLit
@[builtin_syntax_parser] def nonReserved := leading_parser
@[builtin_syntax_parser, inherit_doc ParserDescr.nonReservedSymbol]
def nonReserved := leading_parser
"&" >> strLit
@[builtin_syntax_parser, inherit_doc ParserDescr.unicodeSymbol]
def unicodeAtom := leading_parser
"unicode(" >> strLit >> ", " >> strLit >> optional (", " >> nonReservedSymbol "preserveForPP") >> ")"
end Syntax
@ -61,25 +66,39 @@ def namedName := leading_parser
atomic (" (" >> nonReservedSymbol "name") >> " := " >> ident >> ")"
def optNamedName := optional namedName
def identPrec := leading_parser ident >> optPrecedence
def notationItem := withAntiquot (mkAntiquot "notationItem" decl_name%) (strLit <|> Syntax.unicodeAtom <|> identPrec)
/--
`prefix:prec "op" => f` is equivalent to `notation:prec "op" x:prec => f x`.
-/
def «prefix» := leading_parser "prefix"
/--
`infix:prec "op" => f` is equivalent to `notation:prec x:prec1 "op" y:prec1 => f x y`, where `prec1 := prec + 1`.
-/
def «infix» := leading_parser "infix"
/--
`infixl:prec "op" => f` is equivalent to `notation:prec x:prec "op" y:prec1 => f x y`, where `prec1 := prec + 1`.
-/
def «infixl» := leading_parser "infixl"
/--
`infixr:prec "op" => f` is equivalent to `notation:prec x:prec1 "op" y:prec => f x y`, where `prec1 := prec + 1`.
-/
def «infixr» := leading_parser "infixr"
/--
`postfix:prec "op" => f` is equivalent to `notation:prec x:prec "op" => f x`.
-/
def «postfix» := leading_parser "postfix"
def mixfixKind := «prefix» <|> «infix» <|> «infixl» <|> «infixr» <|> «postfix»
@[builtin_command_parser] def «mixfix» := leading_parser
optional docComment >> optional Term.«attributes» >> Term.attrKind >> mixfixKind >>
precedence >> optNamedName >> optNamedPrio >> ppSpace >> strLit >> darrow >> termParser
-- NOTE: We use `suppressInsideQuot` in the following parsers because quotations inside them are evaluated in the same stage and
-- thus should be ignored when we use `checkInsideQuot` to prepare the next stage for a builtin syntax change
def identPrec := leading_parser ident >> optPrecedence
def optKind : Parser := optional (" (" >> nonReservedSymbol "kind" >> ":=" >> ident >> ")")
def notationItem := ppSpace >> withAntiquot (mkAntiquot "notationItem" decl_name%) (strLit <|> identPrec)
precedence >> optNamedName >> optNamedPrio >> ppSpace >> notationItem >> darrow >> termParser
@[builtin_command_parser] def «notation» := leading_parser
optional docComment >> optional Term.«attributes» >> Term.attrKind >>
"notation" >> optPrecedence >> optNamedName >> optNamedPrio >> many notationItem >> darrow >> termParser
"notation" >> optPrecedence >> optNamedName >> optNamedPrio >> many (ppSpace >> notationItem) >> darrow >> termParser
-- NOTE: We use `suppressInsideQuot` in the following parsers because quotations inside them are evaluated in the same stage and
-- thus should be ignored when we use `checkInsideQuot` to prepare the next stage for a builtin syntax change
def optKind : Parser := optional (" (" >> nonReservedSymbol "kind" >> ":=" >> ident >> ")")
@[builtin_command_parser] def «macro_rules» := suppressInsideQuot <| leading_parser
optional docComment >> optional Term.«attributes» >> Term.attrKind >>
"macro_rules" >> optKind >> Term.matchAlts

View file

@ -548,9 +548,9 @@ def funBinder : Parser :=
-- NOTE: we disable anonymous antiquotations to ensure that `fun $b => ...`
-- remains a `term` antiquotation
def basicFun : Parser := leading_parser (withAnonymousAntiquot := false)
ppGroup (many1 (ppSpace >> funBinder) >> optType >> unicodeSymbol " ↦" " =>") >> ppSpace >> termParser
ppGroup (many1 (ppSpace >> funBinder) >> optType >> unicodeSymbol " ↦" " =>" (preserveForPP := true)) >> ppSpace >> termParser
@[builtin_term_parser] def «fun» := leading_parser:maxPrec
ppAllowUngrouped >> unicodeSymbol "λ" "fun" >> (basicFun <|> matchAlts)
ppAllowUngrouped >> unicodeSymbol "λ" "fun" (preserveForPP := true) >> (basicFun <|> matchAlts)
def optExprPrecedence := optional (atomic ":" >> termParser maxPrec)
def withAnonymousAntiquot := leading_parser

View file

@ -141,6 +141,7 @@ def compileEmbeddedParsers : ParserDescr → MetaM Unit
| ParserDescr.trailingNode _ _ _ d => compileEmbeddedParsers d
| ParserDescr.symbol _ => pure ()
| ParserDescr.nonReservedSymbol _ _ => pure ()
| ParserDescr.unicodeSymbol _ _ _ => pure ()
| ParserDescr.cat _ _ => pure ()
/-- Precondition: `α` must match `ctx.tyName`. -/

View file

@ -33,10 +33,15 @@ register_builtin_option pp.parens : Bool := {
group := "pp"
descr := "(pretty printer) if set to true, notation is wrapped in parentheses regardless of precedence"
}
register_builtin_option pp.unicode : Bool := {
defValue := true
group := "pp"
descr := "(pretty printer) if set to false, avoid using non-unicode symbols when formatting"
}
register_builtin_option pp.unicode.fun : Bool := {
defValue := false
group := "pp"
descr := "(pretty printer) disable/enable unicode ↦ notation for functions"
descr := "(pretty printer) if set to true, use unicode `↦` notation for functions"
}
register_builtin_option pp.match : Bool := {
defValue := true
@ -280,7 +285,8 @@ def getPPExplicit (o : Options) : Bool := o.get pp.explicit.name (getPPAll o)
def getPPForalls (o : Options) : Bool := o.get pp.foralls.name pp.foralls.defValue
def getPPNotation (o : Options) : Bool := o.get pp.notation.name (!getPPAll o)
def getPPParens (o : Options) : Bool := o.get pp.parens.name pp.parens.defValue
def getPPUnicodeFun (o : Options) : Bool := o.get pp.unicode.fun.name false
def getPPUnicode (o : Options) : Bool := o.get pp.unicode.name pp.unicode.defValue
def getPPUnicodeFun (o : Options) : Bool := o.get pp.unicode.fun.name pp.unicode.fun.defValue
def getPPMatch (o : Options) : Bool := o.get pp.match.name (!getPPAll o)
def getPPSorrySource (o : Options) : Bool := o.get pp.sorrySource.name pp.sorrySource.defValue
def getPPFieldNotation (o : Options) : Bool := o.get pp.fieldNotation.name (!getPPAll o)

View file

@ -12,6 +12,7 @@ public import Lean.Parser.StrInterpolation
public import Lean.KeyedDeclsAttribute
public import Lean.ParserCompiler.Attribute
public import Lean.PrettyPrinter.Basic
public import Lean.PrettyPrinter.Delaborator.Options
public section
@ -437,11 +438,11 @@ def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit
@[combinator_formatter symbolNoAntiquot, expose]
def symbolNoAntiquot.formatter (sym : String) : Formatter := do
let stx ← getCur
if stx.isToken sym then do
let (Syntax.atom info _) ← pure stx | unreachable!
withMaybeTag (getExprPos? stx) (pushToken info sym false)
if stx.isToken sym then
let (Syntax.atom info _) := stx | unreachable!
withMaybeTag (getExprPos? stx) <| pushToken info sym false
goLeft
else do
else
trace[PrettyPrinter.format.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}'"
throwBacktrack
@ -453,14 +454,21 @@ def symbolNoAntiquot.formatter (sym : String) : Formatter := do
symbolNoAntiquot.formatter ch.toString
@[combinator_formatter unicodeSymbolNoAntiquot, expose]
def unicodeSymbolNoAntiquot.formatter (sym asciiSym : String) : Formatter := do
let Syntax.atom info val ← getCur
| throwError m!"not an atom: {← getCur}"
if val == sym.trim then
pushToken info sym false
def unicodeSymbolNoAntiquot.formatter (sym asciiSym : String) (preserveForPP : Bool) : Formatter := do
let stx ← getCur
let usesUnicode := stx.isToken sym
let usesAscii := stx.isToken asciiSym
if usesUnicode || usesAscii then
let (Syntax.atom info _) := stx | unreachable!
-- Use unicode version if pp.unicode is enabled and either preserveForPP is false or the syntax contains the unicode version
if getPPUnicode (← getOptions) && (!preserveForPP || usesUnicode) then
withMaybeTag (getExprPos? stx) <| pushToken info sym false
else
withMaybeTag (getExprPos? stx) <| pushToken info asciiSym false
goLeft
else
pushToken info asciiSym false
goLeft
trace[PrettyPrinter.format.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}' or '{asciiSym}'"
throwBacktrack
@[combinator_formatter identNoAntiquot, expose]
def identNoAntiquot.formatter : Formatter := do

View file

@ -469,13 +469,25 @@ def trailingNode.parenthesizer (k : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Pa
@[combinator_parenthesizer rawCh, expose] def rawCh.parenthesizer (_ch : Char) := visitToken
@[combinator_parenthesizer symbolNoAntiquot, expose] def symbolNoAntiquot.parenthesizer (_sym : String) := visitToken
@[combinator_parenthesizer unicodeSymbolNoAntiquot, expose] def unicodeSymbolNoAntiquot.parenthesizer (_sym _asciiSym : String) := visitToken
@[combinator_parenthesizer symbolNoAntiquot, expose] def symbolNoAntiquot.parenthesizer (sym : String) := do
let stx ← getCur
if stx.isToken sym then
visitToken
else
trace[PrettyPrinter.parenthesize.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}'"
throwBacktrack
@[combinator_parenthesizer nonReservedSymbolNoAntiquot, expose] def nonReservedSymbolNoAntiquot.parenthesizer := symbolNoAntiquot.parenthesizer
@[combinator_parenthesizer unicodeSymbolNoAntiquot, expose] def unicodeSymbolNoAntiquot.parenthesizer (sym asciiSym : String) (_preserveForPP : Bool) := do
let stx ← getCur
if stx.isToken sym || stx.isToken asciiSym then
visitToken
else
trace[PrettyPrinter.parenthesize.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}' or '{asciiSym}'"
throwBacktrack
@[combinator_parenthesizer identNoAntiquot, expose] def identNoAntiquot.parenthesizer := do checkKind identKind; visitToken
@[combinator_parenthesizer rawIdentNoAntiquot, expose] def rawIdentNoAntiquot.parenthesizer := visitToken
@[combinator_parenthesizer identEq, expose] def identEq.parenthesizer (_id : Name) := visitToken
@[combinator_parenthesizer nonReservedSymbolNoAntiquot, expose] def nonReservedSymbolNoAntiquot.parenthesizer (_sym : String) (_includeIdent : Bool) := visitToken
@[combinator_parenthesizer charLitNoAntiquot, expose] def charLitNoAntiquot.parenthesizer := visitToken
@[combinator_parenthesizer strLitNoAntiquot, expose] def strLitNoAntiquot.parenthesizer := visitToken

View file

@ -6,3 +6,12 @@ syntax " 0" : term
syntax " 'a'" : term
syntax " `a" : term
syntax " \"a" : term
syntax &"0" : term
syntax &"'a'" : term
syntax &"`a" : term
syntax &"\"a" : term
syntax &" 0" : term
syntax &" 'a'" : term
syntax &" `a" : term
syntax &" \"a" : term

View file

@ -6,3 +6,11 @@
242.lean:6:7-6:13: error: invalid atom
242.lean:7:7-7:12: error: invalid atom
242.lean:8:7-8:13: error: invalid atom
242.lean:10:8-10:11: error: invalid atom
242.lean:11:8-11:13: error: invalid atom
242.lean:12:8-12:12: error: invalid atom
242.lean:13:8-13:13: error: invalid atom
242.lean:14:8-14:12: error: invalid atom
242.lean:15:8-15:14: error: invalid atom
242.lean:16:8-16:13: error: invalid atom
242.lean:17:8-17:14: error: invalid atom

View file

@ -0,0 +1,80 @@
import Lean
/-!
# Tests of `pp.unicode` pretty printer option
-/
open Lean PrettyPrinter
/-!
Testing a notation with a `unicode` operator.
Respects the current setting of `pp.unicode` exactly.
-/
def And' := And
notation:35 x:36 unicode(" ∧' ", " /\\' ") y:35 => And' x y
/-- info: True ∧' False : Prop -/
#guard_msgs in #check True ∧' False
/-- info: True /\' False : Prop -/
#guard_msgs in set_option pp.unicode false in #check True ∧' False
/-- info: True✝ ∧' False✝ -/
#guard_msgs in #eval do ppTerm (← `(True ∧' False))
/-- info: True✝ ∧' False✝ -/
#guard_msgs in #eval do ppTerm (← `(True /\' False))
/-- info: True✝ /\' False✝ -/
#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True ∧' False))
/-- info: True✝ /\' False✝ -/
#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True /\' False))
/-!
Testing a notation with a `unicode` operator with `preserveForPP`.
Respects the current setting of `pp.unicode` *if* the underlying atom is in the unicode form.
The `notation` command creates a pretty printer using the ASCII form,
so this is only possible if a delaborator creates a syntax quotation with the unicode form.
-/
def And'' := And
notation:35 x:36 unicode(" ∧'' ", " /\\'' ", preserveForPP) y:35 => And'' x y
/-- info: True /\'' False : Prop -/
#guard_msgs in #check True ∧'' False
/-- info: True /\'' False : Prop -/
#guard_msgs in set_option pp.unicode false in #check True ∧'' False
/-- info: True✝ ∧'' False✝ -/
#guard_msgs in #eval do ppTerm (← `(True ∧'' False))
/-- info: True✝ /\'' False✝ -/
#guard_msgs in #eval do ppTerm (← `(True /\'' False))
/-- info: True✝ /\'' False✝ -/
#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True ∧'' False))
/-- info: True✝ /\'' False✝ -/
#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True /\'' False))
/-!
The `fun` notation uses `preserveForPP`.
Whne `pp.unicode.fun` is true, its elaborator uses `↦`.
-/
/-- info: fun x => x : ?m.1 → ?m.1 -/
#guard_msgs in #check fun x => x
/-- info: fun x ↦ x : ?m.1 → ?m.1 -/
#guard_msgs in set_option pp.unicode.fun true in #check fun x => x
/-- info: fun x => x : ?m.1 -> ?m.1 -/
#guard_msgs in set_option pp.unicode.fun true in set_option pp.unicode false in #check fun x => x
-- The delaborator is what uses `↦`, so the option has no effect here.
/-- info: fun x✝ => x✝ -/
#guard_msgs in set_option pp.unicode.fun true in #eval do ppTerm (← `(fun x => x))
/-- info: fun x✝ => x✝ -/
#guard_msgs in #eval do ppTerm (← `(fun x => x))
/-- info: fun x✝ ↦ x✝ -/
#guard_msgs in #eval do ppTerm (← `(fun x ↦ x))
/-- info: fun x✝ => x✝ -/
#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(fun x ↦ x))