353 lines
18 KiB
Text
353 lines
18 KiB
Text
/-
|
|
Copyright (c) 2019 Microsoft Corporation. All rights reserved.
|
|
Released under Apache 2.0 license as described in the file LICENSE.
|
|
Authors: Leonardo de Moura, Sebastian Ullrich
|
|
-/
|
|
module
|
|
|
|
prelude
|
|
public import Lean.PrettyPrinter.Formatter
|
|
public import Lean.PrettyPrinter.Parenthesizer
|
|
meta import Lean.Hygiene
|
|
-- for `run_builtin_parser_attribute_hooks`
|
|
import all Lean.Parser.Types
|
|
import all Lean.Parser.Basic
|
|
import all Lean.Parser.Extension
|
|
|
|
public section
|
|
|
|
namespace Lean
|
|
namespace Parser
|
|
|
|
-- synthesize pretty printers for parsers declared prior to `Lean.PrettyPrinter`
|
|
-- (because `Parser.Extension` depends on them)
|
|
attribute [run_builtin_parser_attribute_hooks]
|
|
leadingNode termParser commandParser mkAntiquot nodeWithAntiquot sepBy sepBy1
|
|
unicodeSymbol nonReservedSymbol
|
|
withCache withResetCache withPosition withPositionAfterLinebreak withoutPosition withForbidden withoutForbidden setExpected
|
|
incQuotDepth decQuotDepth suppressInsideQuot evalInsideQuot
|
|
withOpen withOpenDecl
|
|
dbgTraceState
|
|
|
|
/-- The parser `optional(p)`, or `(p)?`, parses `p` if it succeeds,
|
|
otherwise it succeeds with no value.
|
|
|
|
Note that because `?` is a legal identifier character, one must write `(p)?` or `p ?` for
|
|
it to parse correctly. `ident?` will not work; one must write `(ident)?` instead.
|
|
|
|
This parser has arity 1: it produces a `nullKind` node containing either zero arguments
|
|
(for the `none` case) or the list of arguments produced by `p`.
|
|
(In particular, if `p` has arity 0 then the two cases are not differentiated!) -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def optional (p : Parser) : Parser :=
|
|
optionalNoAntiquot (withAntiquotSpliceAndSuffix `optional p (symbol "?"))
|
|
|
|
/-- The parser `many(p)`, or `p*`, repeats `p` until it fails, and returns the list of results.
|
|
|
|
The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be
|
|
automatically replaced by `group(p)` to ensure that it produces exactly 1 value.
|
|
|
|
This parser has arity 1: it produces a `nullKind` node containing one argument for each
|
|
invocation of `p` (or `group(p)`). -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def many (p : Parser) : Parser :=
|
|
manyNoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*"))
|
|
|
|
/-- The parser `many1(p)`, or `p+`, repeats `p` until it fails, and returns the list of results.
|
|
`p` must succeed at least once, or this parser will fail.
|
|
|
|
Note that this parser produces the same parse tree as the `many(p)` / `p*` combinator,
|
|
and one matches both `p*` and `p+` using `$[ .. ]*` syntax in a syntax match.
|
|
(There is no `$[ .. ]+` syntax.)
|
|
|
|
The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be
|
|
automatically replaced by `group(p)` to ensure that it produces exactly 1 value.
|
|
|
|
This parser has arity 1: it produces a `nullKind` node containing one argument for each
|
|
invocation of `p` (or `group(p)`). -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def many1 (p : Parser) : Parser :=
|
|
many1NoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*"))
|
|
|
|
/-- The parser `ident` parses a single identifier, possibly with namespaces, such as `foo` or
|
|
`bar.baz`. The identifier must not be a declared token, so for example it will not match `"def"`
|
|
because `def` is a keyword token. Tokens are implicitly declared by using them in string literals
|
|
in parser declarations, so `syntax foo := "bla"` will make `bla` no longer legal as an identifier.
|
|
|
|
Identifiers can contain special characters or keywords if they are escaped using the `«»` characters:
|
|
`«def»` is an identifier named `def`, and `«x»` is treated the same as `x`. This is useful for
|
|
using disallowed characters in identifiers such as `«foo.bar».baz` or `«hello world»`.
|
|
|
|
This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier.
|
|
You can use `TSyntax.getId` to extract the name from the resulting syntax object. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def ident : Parser :=
|
|
withAntiquot (mkAntiquot "ident" identKind) identNoAntiquot
|
|
|
|
-- `optional (checkNoWsBefore >> "." >> checkNoWsBefore >> ident)`
|
|
-- can never fully succeed but ensures that the identifier
|
|
-- produces a partial syntax that contains the dot.
|
|
-- The partial syntax is sometimes useful for dot-auto-completion.
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def identWithPartialTrailingDot :=
|
|
ident >> optional (checkNoWsBefore >> "." >> checkNoWsBefore >> ident)
|
|
|
|
-- `ident` and `rawIdent` produce the same syntax tree, so we reuse the antiquotation kind name
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def rawIdent : Parser :=
|
|
withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot
|
|
|
|
/--
|
|
The parser `hygieneInfo` parses no text, but creates a `hygieneInfoKind` node
|
|
containing an anonymous identifier as if it were parsed at the current position.
|
|
This identifier is modified by syntax quotations to add macro scopes like a regular identifier.
|
|
|
|
This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=`
|
|
collects macro scopes, which we can apply to `this` when expanding to `have this := ...`.
|
|
See [the language reference](lean-manual://section/macro-hygiene) for more information about
|
|
macro hygiene.
|
|
|
|
This is also used to implement cdot functions such as `(1 + ·)`. The opening parenthesis contains
|
|
a `hygieneInfo` node as does the cdot, which lets cdot expansion hygienically associate parentheses to cdots.
|
|
|
|
This parser has arity 1: it produces a `hygieneInfoKind` node containing an anonymous `Syntax.ident`.
|
|
You can use `HygieneInfo.mkIdent` to create an `Ident` from the syntax object,
|
|
but you can also use `TSyntax.getHygieneInfo` to get the raw name from the identifier. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def hygieneInfo : Parser :=
|
|
withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot
|
|
|
|
/-- The parser `num` parses a numeric literal in several bases:
|
|
|
|
* Decimal: `129`
|
|
* Hexadecimal: `0xdeadbeef`
|
|
* Octal: `0o755`
|
|
* Binary: `0b1101`
|
|
|
|
This parser has arity 1: it produces a `numLitKind` node containing an atom with the text of the
|
|
literal.
|
|
You can use `TSyntax.getNat` to extract the number from the resulting syntax object. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def numLit : Parser :=
|
|
withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot
|
|
|
|
/-- The parser `hexnum` parses a hexadecimal numeric literal not containing the `0x` prefix.
|
|
|
|
It produces a `hexnumKind` node containing an atom with the text of the
|
|
literal. This parser is mainly used for creating atoms such `#<hexnum>`. Recall that `hexnum`
|
|
is not a token and this parser must be prefixed by another parser.
|
|
|
|
For numerals such as `0xadef100a`, you should use `numLit`.
|
|
-/
|
|
@[builtin_doc] def hexnum : Parser :=
|
|
withAntiquot (mkAntiquot "hexnum" hexnumKind) hexnumNoAntiquot
|
|
|
|
/-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`.
|
|
|
|
This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text
|
|
of the literal.
|
|
You can use `TSyntax.getScientific` to extract the parts from the resulting syntax object. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def scientificLit : Parser :=
|
|
withAntiquot (mkAntiquot "scientific" scientificLitKind) scientificLitNoAntiquot
|
|
|
|
/-- The parser `str` parses a string literal, such as `"foo"` or `"\r\n"`. Strings can contain
|
|
C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters like `∈`.
|
|
Newlines in a string are interpreted literally.
|
|
|
|
This parser has arity 1: it produces a `strLitKind` node containing an atom with the raw
|
|
literal (including the quote marks and without interpreting the escapes).
|
|
You can use `TSyntax.getString` to decode the string from the resulting syntax object. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def strLit : Parser :=
|
|
withAntiquot (mkAntiquot "str" strLitKind) strLitNoAntiquot
|
|
|
|
/-- The parser `char` parses a character literal, such as `'a'` or `'\n'`. Character literals can
|
|
contain C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters
|
|
like `∈`, but must evaluate to a single unicode codepoint, so `'♥'` is allowed but `'❤️'` is not
|
|
(since it is two codepoints but one grapheme cluster).
|
|
|
|
This parser has arity 1: it produces a `charLitKind` node containing an atom with the raw
|
|
literal (including the quote marks and without interpreting the escapes).
|
|
You can use `TSyntax.getChar` to decode the string from the resulting syntax object. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def charLit : Parser :=
|
|
withAntiquot (mkAntiquot "char" charLitKind) charLitNoAntiquot
|
|
|
|
/-- The parser `name` parses a name literal like `` `foo``. The syntax is the same as for identifiers
|
|
(see `ident`) but with a leading backquote.
|
|
|
|
This parser has arity 1: it produces a `nameLitKind` node containing the raw literal
|
|
(including the backquote).
|
|
You can use `TSyntax.getName` to extract the name from the resulting syntax object. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] def nameLit : Parser :=
|
|
withAntiquot (mkAntiquot "name" nameLitKind) nameLitNoAntiquot
|
|
|
|
/-- The parser `group(p)` parses the same thing as `p`, but it wraps the results in a `groupKind`
|
|
node.
|
|
|
|
This parser always has arity 1, even if `p` does not. Parsers like `p*` are automatically
|
|
rewritten to `group(p)*` if `p` does not have arity 1, so that the results from separate invocations
|
|
of `p` can be differentiated. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc, inline] def group (p : Parser) : Parser :=
|
|
node groupKind p
|
|
|
|
/-- The parser `many1Indent(p)` is equivalent to `withPosition((colGe p)+)`. This has the effect of
|
|
parsing one or more occurrences of `p`, where each subsequent `p` parse needs to be indented
|
|
the same or more than the first parse.
|
|
|
|
This parser has arity 1, and returns a list of the results from `p`.
|
|
`p` is "auto-grouped" if it is not arity 1. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc, inline] def many1Indent (p : Parser) : Parser :=
|
|
withPosition $ many1 (checkColGe "irrelevant" >> p)
|
|
|
|
/-- The parser `manyIndent(p)` is equivalent to `withPosition((colGe p)*)`. This has the effect of
|
|
parsing zero or more occurrences of `p`, where each subsequent `p` parse needs to be indented
|
|
the same or more than the first parse.
|
|
|
|
This parser has arity 1, and returns a list of the results from `p`.
|
|
`p` is "auto-grouped" if it is not arity 1. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc, inline] def manyIndent (p : Parser) : Parser :=
|
|
withPosition $ many (checkColGe "irrelevant" >> p)
|
|
|
|
@[builtin_doc, inline] def sepByIndent (p : Parser) (sep : String) (psep : Parser := symbol sep) (allowTrailingSep : Bool := false) : Parser :=
|
|
let p := withAntiquotSpliceAndSuffix `sepBy p (symbol "*")
|
|
withPosition $ sepBy (checkColGe "irrelevant" >> p) sep (psep <|> checkColEq "irrelevant" >> checkLinebreakBefore >> pushNone) allowTrailingSep
|
|
|
|
@[builtin_doc, inline] def sepBy1Indent (p : Parser) (sep : String) (psep : Parser := symbol sep) (allowTrailingSep : Bool := false) : Parser :=
|
|
let p := withAntiquotSpliceAndSuffix `sepBy p (symbol "*")
|
|
withPosition $ sepBy1 (checkColGe "irrelevant" >> p) sep (psep <|> checkColEq "irrelevant" >> checkLinebreakBefore >> pushNone) allowTrailingSep
|
|
|
|
open PrettyPrinter Syntax.MonadTraverser Formatter in
|
|
@[combinator_formatter sepByIndent, expose]
|
|
def sepByIndent.formatter (p : Formatter) (_sep : String) (pSep : Formatter) : Formatter := do
|
|
let stx ← getCur
|
|
let hasNewlineSep := stx.getArgs.mapIdx (fun i n =>
|
|
i % 2 == 1 && n.matchesNull 0 && i != stx.getArgs.size - 1) |>.any id
|
|
visitArgs do
|
|
for i in (List.range stx.getArgs.size).reverse do
|
|
if i % 2 == 0 then p else pSep <|>
|
|
-- If the final separator is a newline, skip it.
|
|
((if i == stx.getArgs.size - 1 then pure () else pushWhitespace "\n") *> goLeft)
|
|
-- If there is any newline separator, then we add an `align` at the start
|
|
-- so that `withPosition` will pick up the right column.
|
|
if hasNewlineSep then
|
|
pushAlign (force := true)
|
|
|
|
@[combinator_formatter sepBy1Indent, expose] def sepBy1Indent.formatter := sepByIndent.formatter
|
|
|
|
attribute [run_builtin_parser_attribute_hooks] sepByIndent sepBy1Indent
|
|
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc] abbrev notSymbol (s : String) : Parser :=
|
|
notFollowedBy (symbol s) s
|
|
|
|
/-- No-op parser combinator that annotates subtrees to be ignored in syntax patterns. -/
|
|
@[run_builtin_parser_attribute_hooks, builtin_doc, inline]
|
|
def patternIgnore : Parser → Parser := node `patternIgnore
|
|
|
|
/-- No-op parser that advises the pretty printer to emit a non-breaking space. -/
|
|
@[builtin_doc, inline] def ppHardSpace : Parser := skip
|
|
/-- No-op parser that advises the pretty printer to emit a space/soft line break. -/
|
|
@[builtin_doc, inline] def ppSpace : Parser := skip
|
|
/-- No-op parser that advises the pretty printer to emit a hard line break. -/
|
|
@[builtin_doc, inline] def ppLine : Parser := skip
|
|
/-- No-op parser combinator that advises the pretty printer to emit a `Format.fill` node. -/
|
|
@[builtin_doc, inline] def ppRealFill : Parser → Parser := id
|
|
/-- No-op parser combinator that advises the pretty printer to emit a `Format.group` node. -/
|
|
@[builtin_doc, inline] def ppRealGroup : Parser → Parser := id
|
|
/-- No-op parser combinator that advises the pretty printer to indent the given syntax without grouping it. -/
|
|
@[builtin_doc, inline] def ppIndent : Parser → Parser := id
|
|
/--
|
|
No-op parser combinator that advises the pretty printer to group and indent the given syntax.
|
|
By default, only syntax categories are grouped. -/
|
|
@[builtin_doc, inline] def ppGroup (p : Parser) : Parser := ppRealFill (ppIndent p)
|
|
/--
|
|
No-op parser combinator that advises the pretty printer to dedent the given syntax.
|
|
Dedenting can in particular be used to counteract automatic indentation. -/
|
|
@[builtin_doc, inline] def ppDedent : Parser → Parser := id
|
|
|
|
/--
|
|
No-op parser combinator that allows the pretty printer to omit the group and
|
|
indent operation in the enclosing category parser.
|
|
```
|
|
syntax ppAllowUngrouped "by " tacticSeq : term
|
|
-- allows a `by` after `:=` without linebreak in between:
|
|
theorem foo : True := by
|
|
trivial
|
|
```
|
|
-/
|
|
@[builtin_doc, inline] def ppAllowUngrouped : Parser := skip
|
|
|
|
/--
|
|
No-op parser combinator that advises the pretty printer to dedent the given syntax,
|
|
if it was grouped by the category parser.
|
|
Dedenting can in particular be used to counteract automatic indentation. -/
|
|
@[builtin_doc, inline] def ppDedentIfGrouped : Parser → Parser := id
|
|
|
|
/--
|
|
No-op parser combinator that prints a line break.
|
|
The line break is soft if the combinator is followed
|
|
by an ungrouped parser (see ppAllowUngrouped), otherwise hard. -/
|
|
@[builtin_doc, inline] def ppHardLineUnlessUngrouped : Parser := skip
|
|
|
|
end Parser
|
|
|
|
section
|
|
open PrettyPrinter Parser
|
|
|
|
@[combinator_formatter ppHardSpace, expose] def ppHardSpace.formatter : Formatter := Formatter.pushWhitespace " "
|
|
@[combinator_formatter ppSpace, expose] def ppSpace.formatter : Formatter := Formatter.pushLine
|
|
@[combinator_formatter ppLine, expose] def ppLine.formatter : Formatter := Formatter.pushWhitespace "\n"
|
|
@[combinator_formatter ppRealFill, expose] def ppRealFill.formatter (p : Formatter) : Formatter := Formatter.fill p
|
|
@[combinator_formatter ppRealGroup, expose] def ppRealGroup.formatter (p : Formatter) : Formatter := Formatter.group p
|
|
@[combinator_formatter ppIndent, expose] def ppIndent.formatter (p : Formatter) : Formatter := Formatter.indent p
|
|
@[combinator_formatter ppDedent, expose] def ppDedent.formatter (p : Formatter) : Formatter := do
|
|
let opts ← getOptions
|
|
Formatter.indent p (some ((0:Int) - Std.Format.getIndent opts))
|
|
|
|
@[combinator_formatter ppAllowUngrouped, expose] def ppAllowUngrouped.formatter : Formatter := do
|
|
modify ({ · with mustBeGrouped := false })
|
|
@[combinator_formatter ppDedentIfGrouped, expose] def ppDedentIfGrouped.formatter (p : Formatter) : Formatter := do
|
|
Formatter.concat p
|
|
let indent := Std.Format.getIndent (← getOptions)
|
|
unless (← get).isUngrouped do
|
|
modify fun st => { st with stack := st.stack.modify (st.stack.size - 1) (·.nest (0 - indent)) }
|
|
@[combinator_formatter ppHardLineUnlessUngrouped, expose] def ppHardLineUnlessUngrouped.formatter : Formatter := do
|
|
if (← get).isUngrouped then
|
|
Formatter.pushLine
|
|
else
|
|
ppLine.formatter
|
|
|
|
end
|
|
|
|
namespace Parser
|
|
|
|
-- now synthesize parenthesizers
|
|
attribute [run_builtin_parser_attribute_hooks]
|
|
ppHardSpace ppSpace ppLine ppGroup ppRealGroup ppRealFill ppIndent ppDedent
|
|
ppAllowUngrouped ppDedentIfGrouped ppHardLineUnlessUngrouped
|
|
|
|
-- workaround: we want `ppSpace` below to refer to the built-in parser alias, not the def above that
|
|
-- would require `meta` access
|
|
end Parser
|
|
|
|
syntax "register_parser_alias " group("(" &"kind" " := " term ") ")? (str ppSpace)? ident (ppSpace colGt term)? : term
|
|
macro_rules
|
|
| `(register_parser_alias $[(kind := $kind?)]? $(aliasName?)? $declName $(info?)?) => do
|
|
let [(fullDeclName, [])] ← Macro.resolveGlobalName declName.getId |
|
|
Macro.throwError "expected non-overloaded constant name"
|
|
let aliasName := match aliasName? with
|
|
| some n => quote (Name.mkSimple n.getString)
|
|
| none => quote declName.getId
|
|
`(do Parser.registerAlias $aliasName ``$declName $declName $(info?.getD (Unhygienic.run `({}))) (kind? := some $(kind?.getD (quote fullDeclName)))
|
|
PrettyPrinter.Formatter.registerAlias $aliasName $(mkIdentFrom declName (declName.getId ++ `formatter))
|
|
PrettyPrinter.Parenthesizer.registerAlias $aliasName $(mkIdentFrom declName (declName.getId ++ `parenthesizer)))
|
|
|
|
open Parser
|
|
|
|
builtin_initialize
|
|
register_parser_alias patternIgnore { autoGroupArgs := false }
|
|
|
|
register_parser_alias group { autoGroupArgs := false }
|
|
register_parser_alias ppHardSpace { stackSz? := some 0 }
|
|
register_parser_alias ppSpace { stackSz? := some 0 }
|
|
register_parser_alias ppLine { stackSz? := some 0 }
|
|
register_parser_alias ppGroup { stackSz? := none }
|
|
register_parser_alias ppRealGroup { stackSz? := none }
|
|
register_parser_alias ppRealFill { stackSz? := none }
|
|
register_parser_alias ppIndent { stackSz? := none }
|
|
register_parser_alias ppDedent { stackSz? := none }
|
|
register_parser_alias ppDedentIfGrouped { stackSz? := none }
|
|
register_parser_alias ppAllowUngrouped { stackSz? := some 0 }
|
|
register_parser_alias ppHardLineUnlessUngrouped { stackSz? := some 0 }
|
|
|
|
end Lean
|