/- Copyright (c) 2019 Microsoft Corporation. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Leonardo de Moura, Sebastian Ullrich -/ module prelude public import Lean.PrettyPrinter.Formatter public import Lean.PrettyPrinter.Parenthesizer meta import Lean.Hygiene -- for `run_builtin_parser_attribute_hooks` import all Lean.Parser.Types import all Lean.Parser.Basic import all Lean.Parser.Extension public section namespace Lean namespace Parser -- synthesize pretty printers for parsers declared prior to `Lean.PrettyPrinter` -- (because `Parser.Extension` depends on them) attribute [run_builtin_parser_attribute_hooks] leadingNode termParser commandParser mkAntiquot nodeWithAntiquot sepBy sepBy1 unicodeSymbol nonReservedSymbol withCache withResetCache withPosition withPositionAfterLinebreak withoutPosition withForbidden withoutForbidden setExpected incQuotDepth decQuotDepth suppressInsideQuot evalInsideQuot withOpen withOpenDecl dbgTraceState /-- The parser `optional(p)`, or `(p)?`, parses `p` if it succeeds, otherwise it succeeds with no value. Note that because `?` is a legal identifier character, one must write `(p)?` or `p ?` for it to parse correctly. `ident?` will not work; one must write `(ident)?` instead. This parser has arity 1: it produces a `nullKind` node containing either zero arguments (for the `none` case) or the list of arguments produced by `p`. (In particular, if `p` has arity 0 then the two cases are not differentiated!) -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def optional (p : Parser) : Parser := optionalNoAntiquot (withAntiquotSpliceAndSuffix `optional p (symbol "?")) /-- The parser `many(p)`, or `p*`, repeats `p` until it fails, and returns the list of results. The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be automatically replaced by `group(p)` to ensure that it produces exactly 1 value. This parser has arity 1: it produces a `nullKind` node containing one argument for each invocation of `p` (or `group(p)`). -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def many (p : Parser) : Parser := manyNoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*")) /-- The parser `many1(p)`, or `p+`, repeats `p` until it fails, and returns the list of results. `p` must succeed at least once, or this parser will fail. Note that this parser produces the same parse tree as the `many(p)` / `p*` combinator, and one matches both `p*` and `p+` using `$[ .. ]*` syntax in a syntax match. (There is no `$[ .. ]+` syntax.) The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be automatically replaced by `group(p)` to ensure that it produces exactly 1 value. This parser has arity 1: it produces a `nullKind` node containing one argument for each invocation of `p` (or `group(p)`). -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def many1 (p : Parser) : Parser := many1NoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*")) /-- The parser `ident` parses a single identifier, possibly with namespaces, such as `foo` or `bar.baz`. The identifier must not be a declared token, so for example it will not match `"def"` because `def` is a keyword token. Tokens are implicitly declared by using them in string literals in parser declarations, so `syntax foo := "bla"` will make `bla` no longer legal as an identifier. Identifiers can contain special characters or keywords if they are escaped using the `«»` characters: `«def»` is an identifier named `def`, and `«x»` is treated the same as `x`. This is useful for using disallowed characters in identifiers such as `«foo.bar».baz` or `«hello world»`. This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier. You can use `TSyntax.getId` to extract the name from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def ident : Parser := withAntiquot (mkAntiquot "ident" identKind) identNoAntiquot -- `optional (checkNoWsBefore >> "." >> checkNoWsBefore >> ident)` -- can never fully succeed but ensures that the identifier -- produces a partial syntax that contains the dot. -- The partial syntax is sometimes useful for dot-auto-completion. @[run_builtin_parser_attribute_hooks, builtin_doc] def identWithPartialTrailingDot := ident >> optional (checkNoWsBefore >> "." >> checkNoWsBefore >> ident) -- `ident` and `rawIdent` produce the same syntax tree, so we reuse the antiquotation kind name @[run_builtin_parser_attribute_hooks, builtin_doc] def rawIdent : Parser := withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot /-- The parser `hygieneInfo` parses no text, but creates a `hygieneInfoKind` node containing an anonymous identifier as if it were parsed at the current position. This identifier is modified by syntax quotations to add macro scopes like a regular identifier. This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=` collects macro scopes, which we can apply to `this` when expanding to `have this := ...`. See [the language reference](lean-manual://section/macro-hygiene) for more information about macro hygiene. This is also used to implement cdot functions such as `(1 + ·)`. The opening parenthesis contains a `hygieneInfo` node as does the cdot, which lets cdot expansion hygienically associate parentheses to cdots. This parser has arity 1: it produces a `hygieneInfoKind` node containing an anonymous `Syntax.ident`. You can use `HygieneInfo.mkIdent` to create an `Ident` from the syntax object, but you can also use `TSyntax.getHygieneInfo` to get the raw name from the identifier. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def hygieneInfo : Parser := withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot /-- The parser `num` parses a numeric literal in several bases: * Decimal: `129` * Hexadecimal: `0xdeadbeef` * Octal: `0o755` * Binary: `0b1101` This parser has arity 1: it produces a `numLitKind` node containing an atom with the text of the literal. You can use `TSyntax.getNat` to extract the number from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def numLit : Parser := withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot /-- The parser `hexnum` parses a hexadecimal numeric literal not containing the `0x` prefix. It produces a `hexnumKind` node containing an atom with the text of the literal. This parser is mainly used for creating atoms such `#`. Recall that `hexnum` is not a token and this parser must be prefixed by another parser. For numerals such as `0xadef100a`, you should use `numLit`. -/ @[builtin_doc] def hexnum : Parser := withAntiquot (mkAntiquot "hexnum" hexnumKind) hexnumNoAntiquot /-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`. This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text of the literal. You can use `TSyntax.getScientific` to extract the parts from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def scientificLit : Parser := withAntiquot (mkAntiquot "scientific" scientificLitKind) scientificLitNoAntiquot /-- The parser `str` parses a string literal, such as `"foo"` or `"\r\n"`. Strings can contain C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters like `∈`. Newlines in a string are interpreted literally. This parser has arity 1: it produces a `strLitKind` node containing an atom with the raw literal (including the quote marks and without interpreting the escapes). You can use `TSyntax.getString` to decode the string from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def strLit : Parser := withAntiquot (mkAntiquot "str" strLitKind) strLitNoAntiquot /-- The parser `char` parses a character literal, such as `'a'` or `'\n'`. Character literals can contain C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters like `∈`, but must evaluate to a single unicode codepoint, so `'♥'` is allowed but `'❤️'` is not (since it is two codepoints but one grapheme cluster). This parser has arity 1: it produces a `charLitKind` node containing an atom with the raw literal (including the quote marks and without interpreting the escapes). You can use `TSyntax.getChar` to decode the string from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def charLit : Parser := withAntiquot (mkAntiquot "char" charLitKind) charLitNoAntiquot /-- The parser `name` parses a name literal like `` `foo``. The syntax is the same as for identifiers (see `ident`) but with a leading backquote. This parser has arity 1: it produces a `nameLitKind` node containing the raw literal (including the backquote). You can use `TSyntax.getName` to extract the name from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks, builtin_doc] def nameLit : Parser := withAntiquot (mkAntiquot "name" nameLitKind) nameLitNoAntiquot /-- The parser `group(p)` parses the same thing as `p`, but it wraps the results in a `groupKind` node. This parser always has arity 1, even if `p` does not. Parsers like `p*` are automatically rewritten to `group(p)*` if `p` does not have arity 1, so that the results from separate invocations of `p` can be differentiated. -/ @[run_builtin_parser_attribute_hooks, builtin_doc, inline] def group (p : Parser) : Parser := node groupKind p /-- The parser `many1Indent(p)` is equivalent to `withPosition((colGe p)+)`. This has the effect of parsing one or more occurrences of `p`, where each subsequent `p` parse needs to be indented the same or more than the first parse. This parser has arity 1, and returns a list of the results from `p`. `p` is "auto-grouped" if it is not arity 1. -/ @[run_builtin_parser_attribute_hooks, builtin_doc, inline] def many1Indent (p : Parser) : Parser := withPosition $ many1 (checkColGe "irrelevant" >> p) /-- The parser `manyIndent(p)` is equivalent to `withPosition((colGe p)*)`. This has the effect of parsing zero or more occurrences of `p`, where each subsequent `p` parse needs to be indented the same or more than the first parse. This parser has arity 1, and returns a list of the results from `p`. `p` is "auto-grouped" if it is not arity 1. -/ @[run_builtin_parser_attribute_hooks, builtin_doc, inline] def manyIndent (p : Parser) : Parser := withPosition $ many (checkColGe "irrelevant" >> p) @[builtin_doc, inline] def sepByIndent (p : Parser) (sep : String) (psep : Parser := symbol sep) (allowTrailingSep : Bool := false) : Parser := let p := withAntiquotSpliceAndSuffix `sepBy p (symbol "*") withPosition $ sepBy (checkColGe "irrelevant" >> p) sep (psep <|> checkColEq "irrelevant" >> checkLinebreakBefore >> pushNone) allowTrailingSep @[builtin_doc, inline] def sepBy1Indent (p : Parser) (sep : String) (psep : Parser := symbol sep) (allowTrailingSep : Bool := false) : Parser := let p := withAntiquotSpliceAndSuffix `sepBy p (symbol "*") withPosition $ sepBy1 (checkColGe "irrelevant" >> p) sep (psep <|> checkColEq "irrelevant" >> checkLinebreakBefore >> pushNone) allowTrailingSep open PrettyPrinter Syntax.MonadTraverser Formatter in @[combinator_formatter sepByIndent, expose] def sepByIndent.formatter (p : Formatter) (_sep : String) (pSep : Formatter) : Formatter := do let stx ← getCur let hasNewlineSep := stx.getArgs.mapIdx (fun i n => i % 2 == 1 && n.matchesNull 0 && i != stx.getArgs.size - 1) |>.any id visitArgs do for i in (List.range stx.getArgs.size).reverse do if i % 2 == 0 then p else pSep <|> -- If the final separator is a newline, skip it. ((if i == stx.getArgs.size - 1 then pure () else pushWhitespace "\n") *> goLeft) -- If there is any newline separator, then we add an `align` at the start -- so that `withPosition` will pick up the right column. if hasNewlineSep then pushAlign (force := true) @[combinator_formatter sepBy1Indent, expose] def sepBy1Indent.formatter := sepByIndent.formatter attribute [run_builtin_parser_attribute_hooks] sepByIndent sepBy1Indent @[run_builtin_parser_attribute_hooks, builtin_doc] abbrev notSymbol (s : String) : Parser := notFollowedBy (symbol s) s /-- No-op parser combinator that annotates subtrees to be ignored in syntax patterns. -/ @[run_builtin_parser_attribute_hooks, builtin_doc, inline] def patternIgnore : Parser → Parser := node `patternIgnore /-- No-op parser that advises the pretty printer to emit a non-breaking space. -/ @[builtin_doc, inline] def ppHardSpace : Parser := skip /-- No-op parser that advises the pretty printer to emit a space/soft line break. -/ @[builtin_doc, inline] def ppSpace : Parser := skip /-- No-op parser that advises the pretty printer to emit a hard line break. -/ @[builtin_doc, inline] def ppLine : Parser := skip /-- No-op parser combinator that advises the pretty printer to emit a `Format.fill` node. -/ @[builtin_doc, inline] def ppRealFill : Parser → Parser := id /-- No-op parser combinator that advises the pretty printer to emit a `Format.group` node. -/ @[builtin_doc, inline] def ppRealGroup : Parser → Parser := id /-- No-op parser combinator that advises the pretty printer to indent the given syntax without grouping it. -/ @[builtin_doc, inline] def ppIndent : Parser → Parser := id /-- No-op parser combinator that advises the pretty printer to group and indent the given syntax. By default, only syntax categories are grouped. -/ @[builtin_doc, inline] def ppGroup (p : Parser) : Parser := ppRealFill (ppIndent p) /-- No-op parser combinator that advises the pretty printer to dedent the given syntax. Dedenting can in particular be used to counteract automatic indentation. -/ @[builtin_doc, inline] def ppDedent : Parser → Parser := id /-- No-op parser combinator that allows the pretty printer to omit the group and indent operation in the enclosing category parser. ``` syntax ppAllowUngrouped "by " tacticSeq : term -- allows a `by` after `:=` without linebreak in between: theorem foo : True := by trivial ``` -/ @[builtin_doc, inline] def ppAllowUngrouped : Parser := skip /-- No-op parser combinator that advises the pretty printer to dedent the given syntax, if it was grouped by the category parser. Dedenting can in particular be used to counteract automatic indentation. -/ @[builtin_doc, inline] def ppDedentIfGrouped : Parser → Parser := id /-- No-op parser combinator that prints a line break. The line break is soft if the combinator is followed by an ungrouped parser (see ppAllowUngrouped), otherwise hard. -/ @[builtin_doc, inline] def ppHardLineUnlessUngrouped : Parser := skip end Parser section open PrettyPrinter Parser @[combinator_formatter ppHardSpace, expose] def ppHardSpace.formatter : Formatter := Formatter.pushWhitespace " " @[combinator_formatter ppSpace, expose] def ppSpace.formatter : Formatter := Formatter.pushLine @[combinator_formatter ppLine, expose] def ppLine.formatter : Formatter := Formatter.pushWhitespace "\n" @[combinator_formatter ppRealFill, expose] def ppRealFill.formatter (p : Formatter) : Formatter := Formatter.fill p @[combinator_formatter ppRealGroup, expose] def ppRealGroup.formatter (p : Formatter) : Formatter := Formatter.group p @[combinator_formatter ppIndent, expose] def ppIndent.formatter (p : Formatter) : Formatter := Formatter.indent p @[combinator_formatter ppDedent, expose] def ppDedent.formatter (p : Formatter) : Formatter := do let opts ← getOptions Formatter.indent p (some ((0:Int) - Std.Format.getIndent opts)) @[combinator_formatter ppAllowUngrouped, expose] def ppAllowUngrouped.formatter : Formatter := do modify ({ · with mustBeGrouped := false }) @[combinator_formatter ppDedentIfGrouped, expose] def ppDedentIfGrouped.formatter (p : Formatter) : Formatter := do Formatter.concat p let indent := Std.Format.getIndent (← getOptions) unless (← get).isUngrouped do modify fun st => { st with stack := st.stack.modify (st.stack.size - 1) (·.nest (0 - indent)) } @[combinator_formatter ppHardLineUnlessUngrouped, expose] def ppHardLineUnlessUngrouped.formatter : Formatter := do if (← get).isUngrouped then Formatter.pushLine else ppLine.formatter end namespace Parser -- now synthesize parenthesizers attribute [run_builtin_parser_attribute_hooks] ppHardSpace ppSpace ppLine ppGroup ppRealGroup ppRealFill ppIndent ppDedent ppAllowUngrouped ppDedentIfGrouped ppHardLineUnlessUngrouped -- workaround: we want `ppSpace` below to refer to the built-in parser alias, not the def above that -- would require `meta` access end Parser syntax "register_parser_alias " group("(" &"kind" " := " term ") ")? (str ppSpace)? ident (ppSpace colGt term)? : term macro_rules | `(register_parser_alias $[(kind := $kind?)]? $(aliasName?)? $declName $(info?)?) => do let [(fullDeclName, [])] ← Macro.resolveGlobalName declName.getId | Macro.throwError "expected non-overloaded constant name" let aliasName := match aliasName? with | some n => quote (Name.mkSimple n.getString) | none => quote declName.getId `(do Parser.registerAlias $aliasName ``$declName $declName $(info?.getD (Unhygienic.run `({}))) (kind? := some $(kind?.getD (quote fullDeclName))) PrettyPrinter.Formatter.registerAlias $aliasName $(mkIdentFrom declName (declName.getId ++ `formatter)) PrettyPrinter.Parenthesizer.registerAlias $aliasName $(mkIdentFrom declName (declName.getId ++ `parenthesizer))) open Parser builtin_initialize register_parser_alias patternIgnore { autoGroupArgs := false } register_parser_alias group { autoGroupArgs := false } register_parser_alias ppHardSpace { stackSz? := some 0 } register_parser_alias ppSpace { stackSz? := some 0 } register_parser_alias ppLine { stackSz? := some 0 } register_parser_alias ppGroup { stackSz? := none } register_parser_alias ppRealGroup { stackSz? := none } register_parser_alias ppRealFill { stackSz? := none } register_parser_alias ppIndent { stackSz? := none } register_parser_alias ppDedent { stackSz? := none } register_parser_alias ppDedentIfGrouped { stackSz? := none } register_parser_alias ppAllowUngrouped { stackSz? := some 0 } register_parser_alias ppHardLineUnlessUngrouped { stackSz? := some 0 } end Lean