diff --git a/src/Init/Prelude.lean b/src/Init/Prelude.lean index c2797be6df..c0f4cf7ab3 100644 --- a/src/Init/Prelude.lean +++ b/src/Init/Prelude.lean @@ -5125,12 +5125,32 @@ inductive ParserDescr where The precedence `prec` and `lhsPrec` are used to determine whether the parser should apply. -/ | trailingNode (kind : SyntaxNodeKind) (prec lhsPrec : Nat) (p : ParserDescr) - /-- A literal symbol parser: parses `val` as a literal. - This parser does not work on identifiers, so `symbol` arguments are declared - as "keywords" and cannot be used as identifiers anywhere in the file. -/ + /-- + Parses the literal symbol. + + The symbol is automatically included in the set of reserved tokens ("keywords"). + Keywords cannot be used as identifiers, unless the identifier is otherwise escaped. + For example, `"fun"` reserves `fun` as a keyword; to refer an identifier named `fun` one can write `«fun»`. + Adding a `&` prefix prevents it from being reserved, for example `&"true"`. + + Whitespace before or after the atom is used as a pretty printing hint. + For example, `" + "` parses `+` and pretty prints it with whitespace on both sides. + The whitespace has no effect on parsing behavior. + -/ | symbol (val : String) - /-- Like `symbol`, but without reserving `val` as a keyword. - If `includeIdent` is true then `ident` will be reinterpreted as `atom` if it matches. -/ + /-- + Parses a literal symbol. The `&` prefix prevents it from being included in the set of reserved tokens ("keywords"). + This means that the symbol can still be recognized as an identifier by other parsers. + + Some syntax categories, such as `tactic`, automatically apply `&` to the first symbol. + + Whitespace before or after the atom is used as a pretty printing hint. + For example, `" + "` parses `+` and pretty prints it with whitespace on both sides. + The whitespace has no effect on parsing behavior. + + (Not exposed by parser description syntax: + If the `includeIdent` argument is true, lets `ident` be reinterpreted as `atom` if it matches.) + -/ | nonReservedSymbol (val : String) (includeIdent : Bool) /-- Parses using the category parser `catName` with right binding power (i.e. precedence) `rbp`. -/ @@ -5150,6 +5170,19 @@ inductive ParserDescr where /-- `sepBy1` is just like `sepBy`, except it takes 1 or more instead of 0 or more occurrences of `p`. -/ | sepBy1 (p : ParserDescr) (sep : String) (psep : ParserDescr) (allowTrailingSep : Bool := false) + /-- + - `unicode("→", "->")` parses a symbol matching either `→` or `->`. Each symbol is reserved. + The second symbol is an ASCII version of the first. + The `pp.unicode` option controls which is used when pretty printing. + - `unicode("→", "->", preserveForPP)` is the same except for pretty printing behavior. + When the `pp.unicode` option is enabled, then the pretty printer uses whichever symbol + matches the underlying atom in the syntax. + The intent is that `preserveForPP` means that the ASCII variant is preferred. + For example, `fun` notation uses `preserveForPP` for its arrow; the delaborator chooses + `↦` or `=>` depending on the value of `pp.unicode.fun`, letting users opt-in to formatting with `↦`. + Note that `notation` creates a pretty printer preferring the ASCII version. + -/ + | unicodeSymbol (val asciiVal : String) (preserveForPP : Bool) instance : Inhabited ParserDescr where default := ParserDescr.symbol "" diff --git a/src/Lean/Elab/DocString/Builtin.lean b/src/Lean/Elab/DocString/Builtin.lean index e356d6ca81..a1d438f08e 100644 --- a/src/Lean/Elab/DocString/Builtin.lean +++ b/src/Lean/Elab/DocString/Builtin.lean @@ -424,6 +424,8 @@ private partial def containsAtom (e : Expr) (atom : String) : MetaM Bool := do | (``ParserDescr.unary, #[.app _ (.lit (.strVal _)), p]) => containsAtom p atom | (``ParserDescr.binary, #[.app _ (.lit (.strVal "andthen")), p, _]) => containsAtom p atom | (``ParserDescr.nonReservedSymbol, #[.lit (.strVal tk), _]) => pure (tk.trim == atom) + | (``ParserDescr.unicodeSymbol, #[.lit (.strVal tk), .lit (.strVal asciiTk), _]) => + pure (tk.trim == atom || asciiTk.trim == atom) | (``ParserDescr.symbol, #[.lit (.strVal tk)]) => pure (tk.trim == atom) | (``Parser.withAntiquot, #[_, p]) => containsAtom p atom | (``Parser.leadingNode, #[_, _, p]) => containsAtom p atom @@ -432,6 +434,8 @@ private partial def containsAtom (e : Expr) (atom : String) : MetaM Bool := do | (``Parser.nonReservedSymbol, #[.lit (.strVal tk), _]) => pure (tk.trim == atom) | (``Parser.symbol, #[.lit (.strVal tk)]) => pure (tk.trim == atom) | (``Parser.symbol, #[_nonlit]) => pure false + | (``Parser.unicodeSymbol, #[.lit (.strVal tk), .lit (.strVal asciiTk), _]) => + pure (tk.trim == atom || asciiTk.trim == atom) | (``Parser.withCache, #[_, p]) => containsAtom p atom | _ => if tryWhnf then attempt (← Meta.whnf p) false else pure false attempt e true diff --git a/src/Lean/Elab/Mixfix.lean b/src/Lean/Elab/Mixfix.lean index 5ba1f5398e..3dec444310 100644 --- a/src/Lean/Elab/Mixfix.lean +++ b/src/Lean/Elab/Mixfix.lean @@ -15,18 +15,18 @@ namespace Lean.Elab.Command @[builtin_macro Lean.Parser.Command.mixfix] def expandMixfix : Macro := fun stx => withAttrKindGlobal stx fun stx => do match stx with - | `($[$doc?:docComment]? $[@[$attrs?,*]]? infixl:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) => + | `($[$doc?:docComment]? $[@[$attrs?,*]]? infixl:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) => let prec1 := quote <| (← evalPrec prec) + 1 `($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? lhs:$prec $op:str rhs:$prec1 => $f lhs rhs) - | `($[$doc?:docComment]? $[@[$attrs?,*]]? infix:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) => + | `($[$doc?:docComment]? $[@[$attrs?,*]]? infix:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) => let prec1 := quote <| (← evalPrec prec) + 1 `($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? lhs:$prec1 $op:str rhs:$prec1 => $f lhs rhs) - | `($[$doc?:docComment]? $[@[$attrs?,*]]? infixr:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) => + | `($[$doc?:docComment]? $[@[$attrs?,*]]? infixr:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) => let prec1 := quote <| (← evalPrec prec) + 1 `($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? lhs:$prec1 $op:str rhs:$prec => $f lhs rhs) - | `($[$doc?:docComment]? $[@[$attrs?,*]]? prefix:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) => + | `($[$doc?:docComment]? $[@[$attrs?,*]]? prefix:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) => `($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str arg:$prec => $f arg) - | `($[$doc?:docComment]? $[@[$attrs?,*]]? postfix:$prec $[(name := $name)]? $[(priority := $prio)]? $op => $f) => + | `($[$doc?:docComment]? $[@[$attrs?,*]]? postfix:$prec $[(name := $name)]? $[(priority := $prio)]? $op:str => $f) => `($[$doc?:docComment]? $[@[$attrs?,*]]? notation:$prec $[(name := $name)]? $[(priority := $prio)]? arg:$prec $op:str => $f arg) | _ => Macro.throwUnsupported where diff --git a/src/Lean/Elab/Notation.lean b/src/Lean/Elab/Notation.lean index ba067c46aa..db320d2dc1 100644 --- a/src/Lean/Elab/Notation.lean +++ b/src/Lean/Elab/Notation.lean @@ -46,15 +46,30 @@ private partial def antiquote (vars : Array Syntax) : Syntax → Syntax def expandNotationItemIntoSyntaxItem : TSyntax ``notationItem → MacroM (TSyntax `stx) | `(notationItem| $_:ident$[:$prec?]?) => `(stx| term $[:$prec?]?) | `(notationItem| $s:str) => `(stx| $s:str) - | _ => Macro.throwUnsupported + -- TODO(kmill): use after stage0 update + -- | `(notationItem| $u:unicodeAtom) => `(stx| $u:unicodeAtom) + -- | _ => Macro.throwUnsupported + | stx => + if stx.raw.isOfKind ``Parser.Syntax.unicodeAtom then + return ⟨stx.raw⟩ + else + Macro.throwUnsupported /-- Convert `notation` command lhs item into a pattern element -/ def expandNotationItemIntoPattern (stx : Syntax) : MacroM Syntax := let k := stx.getKind - if k == `Lean.Parser.Command.identPrec then + if k == ``Lean.Parser.Command.identPrec then return mkAntiquotNode stx[0] (kind := `term) (isPseudoKind := true) else if k == strLitKind then strLitToPattern stx + else if k == ``Parser.Syntax.unicodeAtom then + let preserveForPP := !stx[4].isNone + if preserveForPP then + -- Use the ascii atom for the pattern; that way delaboration gives the ASCII version, + -- which is the preferred form according to `ParserDescr.unicodeSymbol`. + strLitToPattern stx[3] + else + strLitToPattern stx[1] else Macro.throwUnsupported diff --git a/src/Lean/Elab/Syntax.lean b/src/Lean/Elab/Syntax.lean index f5459abb5e..dcfa822013 100644 --- a/src/Lean/Elab/Syntax.lean +++ b/src/Lean/Elab/Syntax.lean @@ -136,6 +136,8 @@ where processAtom stx else if kind == ``Lean.Parser.Syntax.nonReserved then processNonReserved stx + else if kind == ``Lean.Parser.Syntax.unicodeAtom then + processUnicode stx else let stxNew? ← liftM (liftMacroM (expandMacro? stx) : TermElabM _) match stxNew? with @@ -248,23 +250,30 @@ where !s.front.isDigit && !(s.any Char.isWhitespace) + validAtom (stx : Syntax) : ToParserDescrM String := do + let some atom := stx.isStrLit? | throwUnsupportedSyntax + unless isValidAtom atom do + throwErrorAt stx "invalid atom" + return atom + processAtom (stx : Syntax) := do - match stx[0].isStrLit? with - | some atom => - unless isValidAtom atom do - throwErrorAt stx "invalid atom" - /- For syntax categories where initialized with `LeadingIdentBehavior` different from default (e.g., `tactic`), we automatically mark - the first symbol as nonReserved. -/ - if (← read).behavior != Parser.LeadingIdentBehavior.default && (← read).first then - return (← `(ParserDescr.nonReservedSymbol $(quote atom) false), 1) - else - return (← `(ParserDescr.symbol $(quote atom)), 1) - | none => throwUnsupportedSyntax + let atom ← validAtom stx[0] + /- For syntax categories where initialized with `LeadingIdentBehavior` different from default (e.g., `tactic`), we automatically mark + the first symbol as nonReserved. -/ + if (← read).behavior != Parser.LeadingIdentBehavior.default && (← read).first then + return (← `(ParserDescr.nonReservedSymbol $(quote atom) false), 1) + else + return (← `(ParserDescr.symbol $(quote atom)), 1) processNonReserved (stx : Syntax) := do - let some atom := stx[1].isStrLit? | throwUnsupportedSyntax + let atom ← validAtom stx[1] return (← `((with_annotate_term $(stx[0]) @ParserDescr.nonReservedSymbol) $(quote atom) false), 1) + processUnicode (stx : Syntax) := do + let atom ← validAtom stx[1] + let asciiAtom ← validAtom stx[3] + let preserveForPP := !stx[4].isNone + return (← `((with_annotate_term $(stx[0]) @ParserDescr.unicodeSymbol) $(quote atom) $(quote asciiAtom) $(quote preserveForPP)), 1) end Term diff --git a/src/Lean/Parser.lean b/src/Lean/Parser.lean index 649c64602c..b1af9e676a 100644 --- a/src/Lean/Parser.lean +++ b/src/Lean/Parser.lean @@ -92,6 +92,7 @@ unsafe def interpretParserDescr : ParserDescr → CoreM Parenthesizer | ParserDescr.trailingNode k prec lhsPrec d => return trailingNode.parenthesizer k prec lhsPrec (← interpretParserDescr d) | ParserDescr.symbol tk => return symbol.parenthesizer tk | ParserDescr.nonReservedSymbol tk includeIdent => return nonReservedSymbol.parenthesizer tk includeIdent + | ParserDescr.unicodeSymbol tk asciiTk preserve => return unicodeSymbol.parenthesizer tk asciiTk preserve | ParserDescr.parser constName => combinatorParenthesizerAttribute.runDeclFor constName | ParserDescr.cat catName prec => return categoryParser.parenthesizer catName prec @@ -124,6 +125,7 @@ unsafe def interpretParserDescr : ParserDescr → CoreM Formatter | ParserDescr.trailingNode k prec lhsPrec d => return trailingNode.formatter k prec lhsPrec (← interpretParserDescr d) | ParserDescr.symbol tk => return symbol.formatter tk | ParserDescr.nonReservedSymbol tk _ => return nonReservedSymbol.formatter tk + | ParserDescr.unicodeSymbol tk asciiTk preserve => return unicodeSymbol.formatter tk asciiTk preserve | ParserDescr.parser constName => combinatorFormatterAttribute.runDeclFor constName | ParserDescr.cat catName _ => return categoryParser.formatter catName diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index dcc3b7b5a0..8a2290cb4f 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -1229,7 +1229,8 @@ def unicodeSymbolInfo (sym asciiSym : String) : ParserInfo := { def unicodeSymbolFn (sym asciiSym : String) : ParserFn := unicodeSymbolFnAux sym asciiSym ["'" ++ sym ++ "', '" ++ asciiSym ++ "'"] -def unicodeSymbolNoAntiquot (sym asciiSym : String) : Parser := +set_option linter.unusedVariables false in +def unicodeSymbolNoAntiquot (sym asciiSym : String) (preserveForPP : Bool) : Parser := let sym := sym.trim let asciiSym := asciiSym.trim { info := unicodeSymbolInfo sym asciiSym @@ -1786,8 +1787,19 @@ instance : Coe String Parser where def nonReservedSymbol (sym : String) (includeIdent := false) : Parser := tokenWithAntiquot (nonReservedSymbolNoAntiquot sym includeIdent) -def unicodeSymbol (sym asciiSym : String) : Parser := - tokenWithAntiquot (unicodeSymbolNoAntiquot sym asciiSym) +/-- +`unicodeSymbol sym asciiSym` parses either `sym` or `asciiSym` as a reserved symbol. +The `asciiSym` argument should be an ASCII alternative for `sym`. + +- If the `pp.unicode` option is false, then pretty prints using `asciiSym`. +- If the `pp.unicode` option is true, then pretty prints as `sym`, + unless the `preserveForPP` argument is true, in which case the underlying atom + is used to decide whether to print using the unicode or ASCII form. + We take the ASCII form to be the preferred form in this case. + For example, `pp.unicode.fun` causes the delaborator to use `↦` instead of `=>` for the `fun` arrow. +-/ +def unicodeSymbol (sym asciiSym : String) (preserveForPP : Bool := false) : Parser := + tokenWithAntiquot (unicodeSymbolNoAntiquot sym asciiSym preserveForPP) /-- Define parser for `$e` (if `anonymous == true`) and `$e:name`. diff --git a/src/Lean/Parser/Extension.lean b/src/Lean/Parser/Extension.lean index 49f8bd4ebe..85740b4d60 100644 --- a/src/Lean/Parser/Extension.lean +++ b/src/Lean/Parser/Extension.lean @@ -291,6 +291,7 @@ partial def compileParserDescr (categories : ParserCategories) (d : ParserDescr) | ParserDescr.trailingNode k prec lhsPrec d => return trailingNode k prec lhsPrec (← visit d) | ParserDescr.symbol tk => return symbol tk | ParserDescr.nonReservedSymbol tk includeIdent => return nonReservedSymbol tk includeIdent + | ParserDescr.unicodeSymbol tk asciiTk preserve => return unicodeSymbol tk asciiTk preserve | ParserDescr.parser constName => do let (_, p) ← mkParserOfConstantAux constName visit; pure p diff --git a/src/Lean/Parser/Syntax.lean b/src/Lean/Parser/Syntax.lean index ed96795b8d..6293d8f144 100644 --- a/src/Lean/Parser/Syntax.lean +++ b/src/Lean/Parser/Syntax.lean @@ -48,10 +48,15 @@ namespace Syntax @[builtin_syntax_parser] def sepBy1 := leading_parser "sepBy1(" >> withoutPosition (many1 syntaxParser >> ", " >> strLit >> optional (", " >> many1 syntaxParser) >> optional (", " >> nonReservedSymbol "allowTrailingSep")) >> ")" -@[builtin_syntax_parser] def atom := leading_parser +@[builtin_syntax_parser, inherit_doc ParserDescr.symbol] +def atom := leading_parser strLit -@[builtin_syntax_parser] def nonReserved := leading_parser +@[builtin_syntax_parser, inherit_doc ParserDescr.nonReservedSymbol] +def nonReserved := leading_parser "&" >> strLit +@[builtin_syntax_parser, inherit_doc ParserDescr.unicodeSymbol] +def unicodeAtom := leading_parser + "unicode(" >> strLit >> ", " >> strLit >> optional (", " >> nonReservedSymbol "preserveForPP") >> ")" end Syntax @@ -61,25 +66,39 @@ def namedName := leading_parser atomic (" (" >> nonReservedSymbol "name") >> " := " >> ident >> ")" def optNamedName := optional namedName +def identPrec := leading_parser ident >> optPrecedence +def notationItem := withAntiquot (mkAntiquot "notationItem" decl_name%) (strLit <|> Syntax.unicodeAtom <|> identPrec) +/-- +`prefix:prec "op" => f` is equivalent to `notation:prec "op" x:prec => f x`. +-/ def «prefix» := leading_parser "prefix" +/-- +`infix:prec "op" => f` is equivalent to `notation:prec x:prec1 "op" y:prec1 => f x y`, where `prec1 := prec + 1`. +-/ def «infix» := leading_parser "infix" +/-- +`infixl:prec "op" => f` is equivalent to `notation:prec x:prec "op" y:prec1 => f x y`, where `prec1 := prec + 1`. +-/ def «infixl» := leading_parser "infixl" +/-- +`infixr:prec "op" => f` is equivalent to `notation:prec x:prec1 "op" y:prec => f x y`, where `prec1 := prec + 1`. +-/ def «infixr» := leading_parser "infixr" +/-- +`postfix:prec "op" => f` is equivalent to `notation:prec x:prec "op" => f x`. +-/ def «postfix» := leading_parser "postfix" def mixfixKind := «prefix» <|> «infix» <|> «infixl» <|> «infixr» <|> «postfix» @[builtin_command_parser] def «mixfix» := leading_parser optional docComment >> optional Term.«attributes» >> Term.attrKind >> mixfixKind >> - precedence >> optNamedName >> optNamedPrio >> ppSpace >> strLit >> darrow >> termParser --- NOTE: We use `suppressInsideQuot` in the following parsers because quotations inside them are evaluated in the same stage and --- thus should be ignored when we use `checkInsideQuot` to prepare the next stage for a builtin syntax change -def identPrec := leading_parser ident >> optPrecedence - -def optKind : Parser := optional (" (" >> nonReservedSymbol "kind" >> ":=" >> ident >> ")") - -def notationItem := ppSpace >> withAntiquot (mkAntiquot "notationItem" decl_name%) (strLit <|> identPrec) + precedence >> optNamedName >> optNamedPrio >> ppSpace >> notationItem >> darrow >> termParser @[builtin_command_parser] def «notation» := leading_parser optional docComment >> optional Term.«attributes» >> Term.attrKind >> - "notation" >> optPrecedence >> optNamedName >> optNamedPrio >> many notationItem >> darrow >> termParser + "notation" >> optPrecedence >> optNamedName >> optNamedPrio >> many (ppSpace >> notationItem) >> darrow >> termParser + +-- NOTE: We use `suppressInsideQuot` in the following parsers because quotations inside them are evaluated in the same stage and +-- thus should be ignored when we use `checkInsideQuot` to prepare the next stage for a builtin syntax change +def optKind : Parser := optional (" (" >> nonReservedSymbol "kind" >> ":=" >> ident >> ")") @[builtin_command_parser] def «macro_rules» := suppressInsideQuot <| leading_parser optional docComment >> optional Term.«attributes» >> Term.attrKind >> "macro_rules" >> optKind >> Term.matchAlts diff --git a/src/Lean/Parser/Term.lean b/src/Lean/Parser/Term.lean index b2b9cdcc7a..30736df07b 100644 --- a/src/Lean/Parser/Term.lean +++ b/src/Lean/Parser/Term.lean @@ -548,9 +548,9 @@ def funBinder : Parser := -- NOTE: we disable anonymous antiquotations to ensure that `fun $b => ...` -- remains a `term` antiquotation def basicFun : Parser := leading_parser (withAnonymousAntiquot := false) - ppGroup (many1 (ppSpace >> funBinder) >> optType >> unicodeSymbol " ↦" " =>") >> ppSpace >> termParser + ppGroup (many1 (ppSpace >> funBinder) >> optType >> unicodeSymbol " ↦" " =>" (preserveForPP := true)) >> ppSpace >> termParser @[builtin_term_parser] def «fun» := leading_parser:maxPrec - ppAllowUngrouped >> unicodeSymbol "λ" "fun" >> (basicFun <|> matchAlts) + ppAllowUngrouped >> unicodeSymbol "λ" "fun" (preserveForPP := true) >> (basicFun <|> matchAlts) def optExprPrecedence := optional (atomic ":" >> termParser maxPrec) def withAnonymousAntiquot := leading_parser diff --git a/src/Lean/ParserCompiler.lean b/src/Lean/ParserCompiler.lean index b80634501b..60351ee131 100644 --- a/src/Lean/ParserCompiler.lean +++ b/src/Lean/ParserCompiler.lean @@ -141,6 +141,7 @@ def compileEmbeddedParsers : ParserDescr → MetaM Unit | ParserDescr.trailingNode _ _ _ d => compileEmbeddedParsers d | ParserDescr.symbol _ => pure () | ParserDescr.nonReservedSymbol _ _ => pure () + | ParserDescr.unicodeSymbol _ _ _ => pure () | ParserDescr.cat _ _ => pure () /-- Precondition: `α` must match `ctx.tyName`. -/ diff --git a/src/Lean/PrettyPrinter/Delaborator/Options.lean b/src/Lean/PrettyPrinter/Delaborator/Options.lean index 59cec85de6..6f21bd0893 100644 --- a/src/Lean/PrettyPrinter/Delaborator/Options.lean +++ b/src/Lean/PrettyPrinter/Delaborator/Options.lean @@ -33,10 +33,15 @@ register_builtin_option pp.parens : Bool := { group := "pp" descr := "(pretty printer) if set to true, notation is wrapped in parentheses regardless of precedence" } +register_builtin_option pp.unicode : Bool := { + defValue := true + group := "pp" + descr := "(pretty printer) if set to false, avoid using non-unicode symbols when formatting" +} register_builtin_option pp.unicode.fun : Bool := { defValue := false group := "pp" - descr := "(pretty printer) disable/enable unicode ↦ notation for functions" + descr := "(pretty printer) if set to true, use unicode `↦` notation for functions" } register_builtin_option pp.match : Bool := { defValue := true @@ -280,7 +285,8 @@ def getPPExplicit (o : Options) : Bool := o.get pp.explicit.name (getPPAll o) def getPPForalls (o : Options) : Bool := o.get pp.foralls.name pp.foralls.defValue def getPPNotation (o : Options) : Bool := o.get pp.notation.name (!getPPAll o) def getPPParens (o : Options) : Bool := o.get pp.parens.name pp.parens.defValue -def getPPUnicodeFun (o : Options) : Bool := o.get pp.unicode.fun.name false +def getPPUnicode (o : Options) : Bool := o.get pp.unicode.name pp.unicode.defValue +def getPPUnicodeFun (o : Options) : Bool := o.get pp.unicode.fun.name pp.unicode.fun.defValue def getPPMatch (o : Options) : Bool := o.get pp.match.name (!getPPAll o) def getPPSorrySource (o : Options) : Bool := o.get pp.sorrySource.name pp.sorrySource.defValue def getPPFieldNotation (o : Options) : Bool := o.get pp.fieldNotation.name (!getPPAll o) diff --git a/src/Lean/PrettyPrinter/Formatter.lean b/src/Lean/PrettyPrinter/Formatter.lean index 6f3d035657..0aabdaf952 100644 --- a/src/Lean/PrettyPrinter/Formatter.lean +++ b/src/Lean/PrettyPrinter/Formatter.lean @@ -12,6 +12,7 @@ public import Lean.Parser.StrInterpolation public import Lean.KeyedDeclsAttribute public import Lean.ParserCompiler.Attribute public import Lean.PrettyPrinter.Basic +public import Lean.PrettyPrinter.Delaborator.Options public section @@ -437,11 +438,11 @@ def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit @[combinator_formatter symbolNoAntiquot, expose] def symbolNoAntiquot.formatter (sym : String) : Formatter := do let stx ← getCur - if stx.isToken sym then do - let (Syntax.atom info _) ← pure stx | unreachable! - withMaybeTag (getExprPos? stx) (pushToken info sym false) + if stx.isToken sym then + let (Syntax.atom info _) := stx | unreachable! + withMaybeTag (getExprPos? stx) <| pushToken info sym false goLeft - else do + else trace[PrettyPrinter.format.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}'" throwBacktrack @@ -453,14 +454,21 @@ def symbolNoAntiquot.formatter (sym : String) : Formatter := do symbolNoAntiquot.formatter ch.toString @[combinator_formatter unicodeSymbolNoAntiquot, expose] -def unicodeSymbolNoAntiquot.formatter (sym asciiSym : String) : Formatter := do - let Syntax.atom info val ← getCur - | throwError m!"not an atom: {← getCur}" - if val == sym.trim then - pushToken info sym false +def unicodeSymbolNoAntiquot.formatter (sym asciiSym : String) (preserveForPP : Bool) : Formatter := do + let stx ← getCur + let usesUnicode := stx.isToken sym + let usesAscii := stx.isToken asciiSym + if usesUnicode || usesAscii then + let (Syntax.atom info _) := stx | unreachable! + -- Use unicode version if pp.unicode is enabled and either preserveForPP is false or the syntax contains the unicode version + if getPPUnicode (← getOptions) && (!preserveForPP || usesUnicode) then + withMaybeTag (getExprPos? stx) <| pushToken info sym false + else + withMaybeTag (getExprPos? stx) <| pushToken info asciiSym false + goLeft else - pushToken info asciiSym false - goLeft + trace[PrettyPrinter.format.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}' or '{asciiSym}'" + throwBacktrack @[combinator_formatter identNoAntiquot, expose] def identNoAntiquot.formatter : Formatter := do diff --git a/src/Lean/PrettyPrinter/Parenthesizer.lean b/src/Lean/PrettyPrinter/Parenthesizer.lean index c63df3f917..6692674f96 100644 --- a/src/Lean/PrettyPrinter/Parenthesizer.lean +++ b/src/Lean/PrettyPrinter/Parenthesizer.lean @@ -469,13 +469,25 @@ def trailingNode.parenthesizer (k : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Pa @[combinator_parenthesizer rawCh, expose] def rawCh.parenthesizer (_ch : Char) := visitToken -@[combinator_parenthesizer symbolNoAntiquot, expose] def symbolNoAntiquot.parenthesizer (_sym : String) := visitToken -@[combinator_parenthesizer unicodeSymbolNoAntiquot, expose] def unicodeSymbolNoAntiquot.parenthesizer (_sym _asciiSym : String) := visitToken +@[combinator_parenthesizer symbolNoAntiquot, expose] def symbolNoAntiquot.parenthesizer (sym : String) := do + let stx ← getCur + if stx.isToken sym then + visitToken + else + trace[PrettyPrinter.parenthesize.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}'" + throwBacktrack +@[combinator_parenthesizer nonReservedSymbolNoAntiquot, expose] def nonReservedSymbolNoAntiquot.parenthesizer := symbolNoAntiquot.parenthesizer +@[combinator_parenthesizer unicodeSymbolNoAntiquot, expose] def unicodeSymbolNoAntiquot.parenthesizer (sym asciiSym : String) (_preserveForPP : Bool) := do + let stx ← getCur + if stx.isToken sym || stx.isToken asciiSym then + visitToken + else + trace[PrettyPrinter.parenthesize.backtrack] "unexpected syntax '{format stx}', expected symbol '{sym}' or '{asciiSym}'" + throwBacktrack @[combinator_parenthesizer identNoAntiquot, expose] def identNoAntiquot.parenthesizer := do checkKind identKind; visitToken @[combinator_parenthesizer rawIdentNoAntiquot, expose] def rawIdentNoAntiquot.parenthesizer := visitToken @[combinator_parenthesizer identEq, expose] def identEq.parenthesizer (_id : Name) := visitToken -@[combinator_parenthesizer nonReservedSymbolNoAntiquot, expose] def nonReservedSymbolNoAntiquot.parenthesizer (_sym : String) (_includeIdent : Bool) := visitToken @[combinator_parenthesizer charLitNoAntiquot, expose] def charLitNoAntiquot.parenthesizer := visitToken @[combinator_parenthesizer strLitNoAntiquot, expose] def strLitNoAntiquot.parenthesizer := visitToken diff --git a/tests/lean/242.lean b/tests/lean/242.lean index 56d5698371..21de4f1708 100644 --- a/tests/lean/242.lean +++ b/tests/lean/242.lean @@ -6,3 +6,12 @@ syntax " 0" : term syntax " 'a'" : term syntax " `a" : term syntax " \"a" : term + +syntax &"0" : term +syntax &"'a'" : term +syntax &"`a" : term +syntax &"\"a" : term +syntax &" 0" : term +syntax &" 'a'" : term +syntax &" `a" : term +syntax &" \"a" : term diff --git a/tests/lean/242.lean.expected.out b/tests/lean/242.lean.expected.out index 39f7550ca2..af47bde186 100644 --- a/tests/lean/242.lean.expected.out +++ b/tests/lean/242.lean.expected.out @@ -6,3 +6,11 @@ 242.lean:6:7-6:13: error: invalid atom 242.lean:7:7-7:12: error: invalid atom 242.lean:8:7-8:13: error: invalid atom +242.lean:10:8-10:11: error: invalid atom +242.lean:11:8-11:13: error: invalid atom +242.lean:12:8-12:12: error: invalid atom +242.lean:13:8-13:13: error: invalid atom +242.lean:14:8-14:12: error: invalid atom +242.lean:15:8-15:14: error: invalid atom +242.lean:16:8-16:13: error: invalid atom +242.lean:17:8-17:14: error: invalid atom diff --git a/tests/lean/run/ppUnicode.lean b/tests/lean/run/ppUnicode.lean new file mode 100644 index 0000000000..a30b22bc7e --- /dev/null +++ b/tests/lean/run/ppUnicode.lean @@ -0,0 +1,80 @@ +import Lean +/-! +# Tests of `pp.unicode` pretty printer option +-/ + +open Lean PrettyPrinter + +/-! +Testing a notation with a `unicode` operator. + +Respects the current setting of `pp.unicode` exactly. +-/ +def And' := And + +notation:35 x:36 unicode(" ∧' ", " /\\' ") y:35 => And' x y + +/-- info: True ∧' False : Prop -/ +#guard_msgs in #check True ∧' False + +/-- info: True /\' False : Prop -/ +#guard_msgs in set_option pp.unicode false in #check True ∧' False + +/-- info: True✝ ∧' False✝ -/ +#guard_msgs in #eval do ppTerm (← `(True ∧' False)) +/-- info: True✝ ∧' False✝ -/ +#guard_msgs in #eval do ppTerm (← `(True /\' False)) +/-- info: True✝ /\' False✝ -/ +#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True ∧' False)) +/-- info: True✝ /\' False✝ -/ +#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True /\' False)) + +/-! +Testing a notation with a `unicode` operator with `preserveForPP`. + +Respects the current setting of `pp.unicode` *if* the underlying atom is in the unicode form. +The `notation` command creates a pretty printer using the ASCII form, +so this is only possible if a delaborator creates a syntax quotation with the unicode form. +-/ +def And'' := And + +notation:35 x:36 unicode(" ∧'' ", " /\\'' ", preserveForPP) y:35 => And'' x y + +/-- info: True /\'' False : Prop -/ +#guard_msgs in #check True ∧'' False + +/-- info: True /\'' False : Prop -/ +#guard_msgs in set_option pp.unicode false in #check True ∧'' False + +/-- info: True✝ ∧'' False✝ -/ +#guard_msgs in #eval do ppTerm (← `(True ∧'' False)) +/-- info: True✝ /\'' False✝ -/ +#guard_msgs in #eval do ppTerm (← `(True /\'' False)) +/-- info: True✝ /\'' False✝ -/ +#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True ∧'' False)) +/-- info: True✝ /\'' False✝ -/ +#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(True /\'' False)) + +/-! +The `fun` notation uses `preserveForPP`. +Whne `pp.unicode.fun` is true, its elaborator uses `↦`. +-/ +/-- info: fun x => x : ?m.1 → ?m.1 -/ +#guard_msgs in #check fun x => x + +/-- info: fun x ↦ x : ?m.1 → ?m.1 -/ +#guard_msgs in set_option pp.unicode.fun true in #check fun x => x + +/-- info: fun x => x : ?m.1 -> ?m.1 -/ +#guard_msgs in set_option pp.unicode.fun true in set_option pp.unicode false in #check fun x => x + +-- The delaborator is what uses `↦`, so the option has no effect here. +/-- info: fun x✝ => x✝ -/ +#guard_msgs in set_option pp.unicode.fun true in #eval do ppTerm (← `(fun x => x)) + +/-- info: fun x✝ => x✝ -/ +#guard_msgs in #eval do ppTerm (← `(fun x => x)) +/-- info: fun x✝ ↦ x✝ -/ +#guard_msgs in #eval do ppTerm (← `(fun x ↦ x)) +/-- info: fun x✝ => x✝ -/ +#guard_msgs in set_option pp.unicode false in #eval do ppTerm (← `(fun x ↦ x))