lean4-htt/src/Lean/Parser/Extra.lean
2025-10-16 20:27:46 +00:00

353 lines
18 KiB
Text

/-
Copyright (c) 2019 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura, Sebastian Ullrich
-/
module
prelude
public import Lean.PrettyPrinter.Formatter
public import Lean.PrettyPrinter.Parenthesizer
meta import Lean.Hygiene
-- for `run_builtin_parser_attribute_hooks`
import all Lean.Parser.Types
import all Lean.Parser.Basic
import all Lean.Parser.Extension
public section
namespace Lean
namespace Parser
-- synthesize pretty printers for parsers declared prior to `Lean.PrettyPrinter`
-- (because `Parser.Extension` depends on them)
attribute [run_builtin_parser_attribute_hooks]
leadingNode termParser commandParser mkAntiquot nodeWithAntiquot sepBy sepBy1
unicodeSymbol nonReservedSymbol
withCache withResetCache withPosition withPositionAfterLinebreak withoutPosition withForbidden withoutForbidden setExpected
incQuotDepth decQuotDepth suppressInsideQuot evalInsideQuot
withOpen withOpenDecl
dbgTraceState
/-- The parser `optional(p)`, or `(p)?`, parses `p` if it succeeds,
otherwise it succeeds with no value.
Note that because `?` is a legal identifier character, one must write `(p)?` or `p ?` for
it to parse correctly. `ident?` will not work; one must write `(ident)?` instead.
This parser has arity 1: it produces a `nullKind` node containing either zero arguments
(for the `none` case) or the list of arguments produced by `p`.
(In particular, if `p` has arity 0 then the two cases are not differentiated!) -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def optional (p : Parser) : Parser :=
optionalNoAntiquot (withAntiquotSpliceAndSuffix `optional p (symbol "?"))
/-- The parser `many(p)`, or `p*`, repeats `p` until it fails, and returns the list of results.
The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be
automatically replaced by `group(p)` to ensure that it produces exactly 1 value.
This parser has arity 1: it produces a `nullKind` node containing one argument for each
invocation of `p` (or `group(p)`). -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def many (p : Parser) : Parser :=
manyNoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*"))
/-- The parser `many1(p)`, or `p+`, repeats `p` until it fails, and returns the list of results.
`p` must succeed at least once, or this parser will fail.
Note that this parser produces the same parse tree as the `many(p)` / `p*` combinator,
and one matches both `p*` and `p+` using `$[ .. ]*` syntax in a syntax match.
(There is no `$[ .. ]+` syntax.)
The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be
automatically replaced by `group(p)` to ensure that it produces exactly 1 value.
This parser has arity 1: it produces a `nullKind` node containing one argument for each
invocation of `p` (or `group(p)`). -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def many1 (p : Parser) : Parser :=
many1NoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*"))
/-- The parser `ident` parses a single identifier, possibly with namespaces, such as `foo` or
`bar.baz`. The identifier must not be a declared token, so for example it will not match `"def"`
because `def` is a keyword token. Tokens are implicitly declared by using them in string literals
in parser declarations, so `syntax foo := "bla"` will make `bla` no longer legal as an identifier.
Identifiers can contain special characters or keywords if they are escaped using the `«»` characters:
`«def»` is an identifier named `def`, and `«x»` is treated the same as `x`. This is useful for
using disallowed characters in identifiers such as `«foo.bar».baz` or `«hello world»`.
This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier.
You can use `TSyntax.getId` to extract the name from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def ident : Parser :=
withAntiquot (mkAntiquot "ident" identKind) identNoAntiquot
-- `optional (checkNoWsBefore >> "." >> checkNoWsBefore >> ident)`
-- can never fully succeed but ensures that the identifier
-- produces a partial syntax that contains the dot.
-- The partial syntax is sometimes useful for dot-auto-completion.
@[run_builtin_parser_attribute_hooks, builtin_doc] def identWithPartialTrailingDot :=
ident >> optional (checkNoWsBefore >> "." >> checkNoWsBefore >> ident)
-- `ident` and `rawIdent` produce the same syntax tree, so we reuse the antiquotation kind name
@[run_builtin_parser_attribute_hooks, builtin_doc] def rawIdent : Parser :=
withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot
/--
The parser `hygieneInfo` parses no text, but creates a `hygieneInfoKind` node
containing an anonymous identifier as if it were parsed at the current position.
This identifier is modified by syntax quotations to add macro scopes like a regular identifier.
This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=`
collects macro scopes, which we can apply to `this` when expanding to `have this := ...`.
See [the language reference](lean-manual://section/macro-hygiene) for more information about
macro hygiene.
This is also used to implement cdot functions such as `(1 + ·)`. The opening parenthesis contains
a `hygieneInfo` node as does the cdot, which lets cdot expansion hygienically associate parentheses to cdots.
This parser has arity 1: it produces a `hygieneInfoKind` node containing an anonymous `Syntax.ident`.
You can use `HygieneInfo.mkIdent` to create an `Ident` from the syntax object,
but you can also use `TSyntax.getHygieneInfo` to get the raw name from the identifier. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def hygieneInfo : Parser :=
withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot
/-- The parser `num` parses a numeric literal in several bases:
* Decimal: `129`
* Hexadecimal: `0xdeadbeef`
* Octal: `0o755`
* Binary: `0b1101`
This parser has arity 1: it produces a `numLitKind` node containing an atom with the text of the
literal.
You can use `TSyntax.getNat` to extract the number from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def numLit : Parser :=
withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot
/-- The parser `hexnum` parses a hexadecimal numeric literal not containing the `0x` prefix.
It produces a `hexnumKind` node containing an atom with the text of the
literal. This parser is mainly used for creating atoms such `#<hexnum>`. Recall that `hexnum`
is not a token and this parser must be prefixed by another parser.
For numerals such as `0xadef100a`, you should use `numLit`.
-/
@[builtin_doc] def hexnum : Parser :=
withAntiquot (mkAntiquot "hexnum" hexnumKind) hexnumNoAntiquot
/-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`.
This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text
of the literal.
You can use `TSyntax.getScientific` to extract the parts from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def scientificLit : Parser :=
withAntiquot (mkAntiquot "scientific" scientificLitKind) scientificLitNoAntiquot
/-- The parser `str` parses a string literal, such as `"foo"` or `"\r\n"`. Strings can contain
C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters like `∈`.
Newlines in a string are interpreted literally.
This parser has arity 1: it produces a `strLitKind` node containing an atom with the raw
literal (including the quote marks and without interpreting the escapes).
You can use `TSyntax.getString` to decode the string from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def strLit : Parser :=
withAntiquot (mkAntiquot "str" strLitKind) strLitNoAntiquot
/-- The parser `char` parses a character literal, such as `'a'` or `'\n'`. Character literals can
contain C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters
like `∈`, but must evaluate to a single unicode codepoint, so `'♥'` is allowed but `'❤️'` is not
(since it is two codepoints but one grapheme cluster).
This parser has arity 1: it produces a `charLitKind` node containing an atom with the raw
literal (including the quote marks and without interpreting the escapes).
You can use `TSyntax.getChar` to decode the string from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def charLit : Parser :=
withAntiquot (mkAntiquot "char" charLitKind) charLitNoAntiquot
/-- The parser `name` parses a name literal like `` `foo``. The syntax is the same as for identifiers
(see `ident`) but with a leading backquote.
This parser has arity 1: it produces a `nameLitKind` node containing the raw literal
(including the backquote).
You can use `TSyntax.getName` to extract the name from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks, builtin_doc] def nameLit : Parser :=
withAntiquot (mkAntiquot "name" nameLitKind) nameLitNoAntiquot
/-- The parser `group(p)` parses the same thing as `p`, but it wraps the results in a `groupKind`
node.
This parser always has arity 1, even if `p` does not. Parsers like `p*` are automatically
rewritten to `group(p)*` if `p` does not have arity 1, so that the results from separate invocations
of `p` can be differentiated. -/
@[run_builtin_parser_attribute_hooks, builtin_doc, inline] def group (p : Parser) : Parser :=
node groupKind p
/-- The parser `many1Indent(p)` is equivalent to `withPosition((colGe p)+)`. This has the effect of
parsing one or more occurrences of `p`, where each subsequent `p` parse needs to be indented
the same or more than the first parse.
This parser has arity 1, and returns a list of the results from `p`.
`p` is "auto-grouped" if it is not arity 1. -/
@[run_builtin_parser_attribute_hooks, builtin_doc, inline] def many1Indent (p : Parser) : Parser :=
withPosition $ many1 (checkColGe "irrelevant" >> p)
/-- The parser `manyIndent(p)` is equivalent to `withPosition((colGe p)*)`. This has the effect of
parsing zero or more occurrences of `p`, where each subsequent `p` parse needs to be indented
the same or more than the first parse.
This parser has arity 1, and returns a list of the results from `p`.
`p` is "auto-grouped" if it is not arity 1. -/
@[run_builtin_parser_attribute_hooks, builtin_doc, inline] def manyIndent (p : Parser) : Parser :=
withPosition $ many (checkColGe "irrelevant" >> p)
@[builtin_doc, inline] def sepByIndent (p : Parser) (sep : String) (psep : Parser := symbol sep) (allowTrailingSep : Bool := false) : Parser :=
let p := withAntiquotSpliceAndSuffix `sepBy p (symbol "*")
withPosition $ sepBy (checkColGe "irrelevant" >> p) sep (psep <|> checkColEq "irrelevant" >> checkLinebreakBefore >> pushNone) allowTrailingSep
@[builtin_doc, inline] def sepBy1Indent (p : Parser) (sep : String) (psep : Parser := symbol sep) (allowTrailingSep : Bool := false) : Parser :=
let p := withAntiquotSpliceAndSuffix `sepBy p (symbol "*")
withPosition $ sepBy1 (checkColGe "irrelevant" >> p) sep (psep <|> checkColEq "irrelevant" >> checkLinebreakBefore >> pushNone) allowTrailingSep
open PrettyPrinter Syntax.MonadTraverser Formatter in
@[combinator_formatter sepByIndent, expose]
def sepByIndent.formatter (p : Formatter) (_sep : String) (pSep : Formatter) : Formatter := do
let stx ← getCur
let hasNewlineSep := stx.getArgs.mapIdx (fun i n =>
i % 2 == 1 && n.matchesNull 0 && i != stx.getArgs.size - 1) |>.any id
visitArgs do
for i in (List.range stx.getArgs.size).reverse do
if i % 2 == 0 then p else pSep <|>
-- If the final separator is a newline, skip it.
((if i == stx.getArgs.size - 1 then pure () else pushWhitespace "\n") *> goLeft)
-- If there is any newline separator, then we add an `align` at the start
-- so that `withPosition` will pick up the right column.
if hasNewlineSep then
pushAlign (force := true)
@[combinator_formatter sepBy1Indent, expose] def sepBy1Indent.formatter := sepByIndent.formatter
attribute [run_builtin_parser_attribute_hooks] sepByIndent sepBy1Indent
@[run_builtin_parser_attribute_hooks, builtin_doc] abbrev notSymbol (s : String) : Parser :=
notFollowedBy (symbol s) s
/-- No-op parser combinator that annotates subtrees to be ignored in syntax patterns. -/
@[run_builtin_parser_attribute_hooks, builtin_doc, inline]
def patternIgnore : Parser → Parser := node `patternIgnore
/-- No-op parser that advises the pretty printer to emit a non-breaking space. -/
@[builtin_doc, inline] def ppHardSpace : Parser := skip
/-- No-op parser that advises the pretty printer to emit a space/soft line break. -/
@[builtin_doc, inline] def ppSpace : Parser := skip
/-- No-op parser that advises the pretty printer to emit a hard line break. -/
@[builtin_doc, inline] def ppLine : Parser := skip
/-- No-op parser combinator that advises the pretty printer to emit a `Format.fill` node. -/
@[builtin_doc, inline] def ppRealFill : Parser → Parser := id
/-- No-op parser combinator that advises the pretty printer to emit a `Format.group` node. -/
@[builtin_doc, inline] def ppRealGroup : Parser → Parser := id
/-- No-op parser combinator that advises the pretty printer to indent the given syntax without grouping it. -/
@[builtin_doc, inline] def ppIndent : Parser → Parser := id
/--
No-op parser combinator that advises the pretty printer to group and indent the given syntax.
By default, only syntax categories are grouped. -/
@[builtin_doc, inline] def ppGroup (p : Parser) : Parser := ppRealFill (ppIndent p)
/--
No-op parser combinator that advises the pretty printer to dedent the given syntax.
Dedenting can in particular be used to counteract automatic indentation. -/
@[builtin_doc, inline] def ppDedent : Parser → Parser := id
/--
No-op parser combinator that allows the pretty printer to omit the group and
indent operation in the enclosing category parser.
```
syntax ppAllowUngrouped "by " tacticSeq : term
-- allows a `by` after `:=` without linebreak in between:
theorem foo : True := by
trivial
```
-/
@[builtin_doc, inline] def ppAllowUngrouped : Parser := skip
/--
No-op parser combinator that advises the pretty printer to dedent the given syntax,
if it was grouped by the category parser.
Dedenting can in particular be used to counteract automatic indentation. -/
@[builtin_doc, inline] def ppDedentIfGrouped : Parser → Parser := id
/--
No-op parser combinator that prints a line break.
The line break is soft if the combinator is followed
by an ungrouped parser (see ppAllowUngrouped), otherwise hard. -/
@[builtin_doc, inline] def ppHardLineUnlessUngrouped : Parser := skip
end Parser
section
open PrettyPrinter Parser
@[combinator_formatter ppHardSpace, expose] def ppHardSpace.formatter : Formatter := Formatter.pushWhitespace " "
@[combinator_formatter ppSpace, expose] def ppSpace.formatter : Formatter := Formatter.pushLine
@[combinator_formatter ppLine, expose] def ppLine.formatter : Formatter := Formatter.pushWhitespace "\n"
@[combinator_formatter ppRealFill, expose] def ppRealFill.formatter (p : Formatter) : Formatter := Formatter.fill p
@[combinator_formatter ppRealGroup, expose] def ppRealGroup.formatter (p : Formatter) : Formatter := Formatter.group p
@[combinator_formatter ppIndent, expose] def ppIndent.formatter (p : Formatter) : Formatter := Formatter.indent p
@[combinator_formatter ppDedent, expose] def ppDedent.formatter (p : Formatter) : Formatter := do
let opts ← getOptions
Formatter.indent p (some ((0:Int) - Std.Format.getIndent opts))
@[combinator_formatter ppAllowUngrouped, expose] def ppAllowUngrouped.formatter : Formatter := do
modify ({ · with mustBeGrouped := false })
@[combinator_formatter ppDedentIfGrouped, expose] def ppDedentIfGrouped.formatter (p : Formatter) : Formatter := do
Formatter.concat p
let indent := Std.Format.getIndent (← getOptions)
unless (← get).isUngrouped do
modify fun st => { st with stack := st.stack.modify (st.stack.size - 1) (·.nest (0 - indent)) }
@[combinator_formatter ppHardLineUnlessUngrouped, expose] def ppHardLineUnlessUngrouped.formatter : Formatter := do
if (← get).isUngrouped then
Formatter.pushLine
else
ppLine.formatter
end
namespace Parser
-- now synthesize parenthesizers
attribute [run_builtin_parser_attribute_hooks]
ppHardSpace ppSpace ppLine ppGroup ppRealGroup ppRealFill ppIndent ppDedent
ppAllowUngrouped ppDedentIfGrouped ppHardLineUnlessUngrouped
-- workaround: we want `ppSpace` below to refer to the built-in parser alias, not the def above that
-- would require `meta` access
end Parser
syntax "register_parser_alias " group("(" &"kind" " := " term ") ")? (str ppSpace)? ident (ppSpace colGt term)? : term
macro_rules
| `(register_parser_alias $[(kind := $kind?)]? $(aliasName?)? $declName $(info?)?) => do
let [(fullDeclName, [])] ← Macro.resolveGlobalName declName.getId |
Macro.throwError "expected non-overloaded constant name"
let aliasName := match aliasName? with
| some n => quote (Name.mkSimple n.getString)
| none => quote declName.getId
`(do Parser.registerAlias $aliasName ``$declName $declName $(info?.getD (Unhygienic.run `({}))) (kind? := some $(kind?.getD (quote fullDeclName)))
PrettyPrinter.Formatter.registerAlias $aliasName $(mkIdentFrom declName (declName.getId ++ `formatter))
PrettyPrinter.Parenthesizer.registerAlias $aliasName $(mkIdentFrom declName (declName.getId ++ `parenthesizer)))
open Parser
builtin_initialize
register_parser_alias patternIgnore { autoGroupArgs := false }
register_parser_alias group { autoGroupArgs := false }
register_parser_alias ppHardSpace { stackSz? := some 0 }
register_parser_alias ppSpace { stackSz? := some 0 }
register_parser_alias ppLine { stackSz? := some 0 }
register_parser_alias ppGroup { stackSz? := none }
register_parser_alias ppRealGroup { stackSz? := none }
register_parser_alias ppRealFill { stackSz? := none }
register_parser_alias ppIndent { stackSz? := none }
register_parser_alias ppDedent { stackSz? := none }
register_parser_alias ppDedentIfGrouped { stackSz? := none }
register_parser_alias ppAllowUngrouped { stackSz? := some 0 }
register_parser_alias ppHardLineUnlessUngrouped { stackSz? := some 0 }
end Lean