diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 8ba1bef32d..5cf447b65c 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -92,10 +92,15 @@ def andthenFn (p q : ParserFn) : ParserFn := fun c s => firstTokens := p.firstTokens.seq q.firstTokens } -def andthen (p q : Parser) : Parser := { - info := andthenInfo p.info q.info, +/-- The `andthen(p, q)` combinator, usually written as adjacency in syntax declarations (`p q`), +parses `p` followed by `q`. + +The arity of this parser is the sum of the arities of `p` and `q`: +that is, it accumulates all the nodes produced by `p` followed by the nodes from `q` into the list +of arguments to the surrounding parse node. -/ +def andthen (p q : Parser) : Parser where + info := andthenInfo p.info q.info fn := andthenFn p.fn q.fn -} instance : AndThen Parser where andThen a b := andthen a (b ()) @@ -265,10 +270,9 @@ def orelseFn (p q : ParserFn) : ParserFn := NOTE: In order for the pretty printer to retrace an `orelse`, `p` must be a call to `node` or some other parser producing a single node kind. Nested `orelse` calls are flattened for this, i.e. `(node k1 p1 <|> node k2 p2) <|> ...` is fine as well. -/ -def orelse (p q : Parser) : Parser := { +def orelse (p q : Parser) : Parser where info := orelseInfo p.info q.info fn := orelseFn p.fn q.fn -} instance : OrElse Parser where orElse a b := orelse a (b ()) @@ -284,6 +288,12 @@ def atomicFn (p : ParserFn) : ParserFn := fun c s => | ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩ | other => other +/-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails, +but if `p` fails after consuming some tokens `atomic(p)` will fail without consuming tokens. +This is important for the `p <|> q` combinator, because it is not backtracking, and will fail if +`p` fails after consuming some tokens. To get backtracking behavior, use `atomic(p) <|> q` instead. + +This parser has the same arity as `p` - it produces the same result as `p`. -/ def atomic : Parser → Parser := withFn atomicFn def optionalFn (p : ParserFn) : ParserFn := fun c s => @@ -310,6 +320,11 @@ def lookaheadFn (p : ParserFn) : ParserFn := fun c s => let s := p c s if s.hasError then s else s.restore iniSz iniPos +/-- `lookahead(p)` runs `p` and fails if `p` does, but it produces no parse nodes and rewinds the +position to the original state on success. So for example `lookahead("=>")` will ensure that the +next token is `"=>"`, without actually consuming this token. + +This parser has arity 0 - it does not capture anything. -/ def lookahead : Parser → Parser := withFn lookaheadFn def notFollowedByFn (p : ParserFn) (msg : String) : ParserFn := fun c s => @@ -322,9 +337,12 @@ def notFollowedByFn (p : ParserFn) (msg : String) : ParserFn := fun c s => let s := s.restore iniSz iniPos s.mkUnexpectedError s!"unexpected {msg}" -def notFollowedBy (p : Parser) (msg : String) : Parser := { +/-- `notFollowedBy(p, "foo")` succeeds iff `p` fails; +if `p` succeeds then it fails with the message `"unexpected foo"`. + +This parser has arity 0 - it does not capture anything. -/ +def notFollowedBy (p : Parser) (msg : String) : Parser where fn := notFollowedByFn p.fn msg -} partial def manyAux (p : ParserFn) : ParserFn := fun c s => Id.run do let iniSz := s.stackSize @@ -945,10 +963,13 @@ def checkWsBeforeFn (errorMsg : String) : ParserFn := fun _ s => let prev := s.stxStack.back if checkTailWs prev then s else s.mkError errorMsg -def checkWsBefore (errorMsg : String := "space before") : Parser := { +/-- The `ws` parser requires that there is some whitespace at this location. +For example, the parser `"foo" ws "+"` parses `foo +` or `foo/- -/+` but not `foo+`. + +This parser has arity 0 - it does not capture anything. -/ +def checkWsBefore (errorMsg : String := "space before") : Parser where info := epsilonInfo fn := checkWsBeforeFn errorMsg -} def checkTailLinebreak (prev : Syntax) : Bool := match prev.getTailInfo with @@ -959,10 +980,13 @@ def checkLinebreakBeforeFn (errorMsg : String) : ParserFn := fun _ s => let prev := s.stxStack.back if checkTailLinebreak prev then s else s.mkError errorMsg -def checkLinebreakBefore (errorMsg : String := "line break") : Parser := { +/-- The `linebreak` parser requires that there is at least one line break at this location. +(The line break may be inside a comment.) + +This parser has arity 0 - it does not capture anything. -/ +def checkLinebreakBefore (errorMsg : String := "line break") : Parser where info := epsilonInfo fn := checkLinebreakBeforeFn errorMsg -} private def pickNonNone (stack : SyntaxStack) : Syntax := match stack.toSubarray.findRev? fun stx => !stx.isNone with @@ -973,6 +997,13 @@ def checkNoWsBeforeFn (errorMsg : String) : ParserFn := fun _ s => let prev := pickNonNone s.stxStack if checkTailNoWs prev then s else s.mkError errorMsg +/-- The `noWs` parser requires that there is *no* whitespace between the preceding and following +parsers. For example, the parser `"foo" noWs "+"` parses `foo+` but not `foo +`. + +This is almost the same as `"foo+"`, but using this parser will make `foo+` a token, which may cause +problems for the use of `"foo"` and `"+"` as separate tokens in other parsers. + +This parser has arity 0 - it does not capture anything. -/ def checkNoWsBefore (errorMsg : String := "no space before") : Parser := { info := epsilonInfo fn := checkNoWsBeforeFn errorMsg @@ -1234,8 +1265,13 @@ def checkColEqFn (errorMsg : String) : ParserFn := fun c s => if pos.column = savedPos.column then s else s.mkError errorMsg -def checkColEq (errorMsg : String := "checkColEq") : Parser := - { fn := checkColEqFn errorMsg } +/-- The `colEq` parser ensures that the next token starts at exactly the column of the saved +position (see `withPosition`). This can be used to do whitespace sensitive syntax like +a `by` block or `do` block, where all the lines have to line up. + +This parser has arity 0 - it does not capture anything. -/ +def checkColEq (errorMsg : String := "checkColEq") : Parser where + fn := checkColEqFn errorMsg def checkColGeFn (errorMsg : String) : ParserFn := fun c s => match c.savedPos? with @@ -1246,8 +1282,15 @@ def checkColGeFn (errorMsg : String) : ParserFn := fun c s => if pos.column ≥ savedPos.column then s else s.mkError errorMsg -def checkColGe (errorMsg : String := "checkColGe") : Parser := - { fn := checkColGeFn errorMsg } +/-- The `colGe` parser requires that the next token starts from at least the column of the saved +position (see `withPosition`), but allows it to be more indented. +This can be used for whitespace sensitive syntax to ensure that a block does not go outside a +certain indentation scope. For example it is used in the lean grammar for `else if`, to ensure +that the `else` is not less indented than the `if` it matches with. + +This parser has arity 0 - it does not capture anything. -/ +def checkColGe (errorMsg : String := "checkColGe") : Parser where + fn := checkColGeFn errorMsg def checkColGtFn (errorMsg : String) : ParserFn := fun c s => match c.savedPos? with @@ -1258,8 +1301,20 @@ def checkColGtFn (errorMsg : String) : ParserFn := fun c s => if pos.column > savedPos.column then s else s.mkError errorMsg -def checkColGt (errorMsg : String := "checkColGt") : Parser := - { fn := checkColGtFn errorMsg } +/-- The `colGt` parser requires that the next token starts a strictly greater column than the saved +position (see `withPosition`). This can be used for whitespace sensitive syntax for the arguments +to a tactic, to ensure that the following tactic is not interpreted as an argument. +``` +example (x : False) : False := by + revert x + exact id +``` +Here, the `revert` tactic is followed by a list of `colGt ident`, because otherwise it would +interpret `exact` as an identifier and try to revert a variable named `exact`. + +This parser has arity 0 - it does not capture anything. -/ +def checkColGt (errorMsg : String := "checkColGt") : Parser where + fn := checkColGtFn errorMsg def checkLineEqFn (errorMsg : String) : ParserFn := fun c s => match c.savedPos? with @@ -1270,9 +1325,28 @@ def checkLineEqFn (errorMsg : String) : ParserFn := fun c s => if pos.line == savedPos.line then s else s.mkError errorMsg -def checkLineEq (errorMsg : String := "checkLineEq") : Parser := - { fn := checkLineEqFn errorMsg } +/-- The `lineEq` parser requires that the current token is on the same line as the saved position +(see `withPosition`). This can be used to ensure that composite tokens are not "broken up" across +different lines. For example, `else if` is parsed using `lineEq` to ensure that the two tokens +are on the same line. +This parser has arity 0 - it does not capture anything. -/ +def checkLineEq (errorMsg : String := "checkLineEq") : Parser where + fn := checkLineEqFn errorMsg + +/-- `withPosition(p)` runs `p` while setting the "saved position" to the current position. +This has no effect on its own, but various other parsers access this position to achieve some +composite effect: + +* `colGt`, `colGe`, `colEq` compare the column of the saved position to the current position, + used to implement Python-style indentation sensitive blocks +* `lineEq` ensures that the current position is still on the same line as the saved position, + used to implement composite tokens + +The saved position is only available in the read-only state, which is why this is a scoping parser: +after the `withPosition(..)` block the saved position will be restored to its original value. + +This parser has the same arity as `p` - it just forwards the results of `p`. -/ def withPosition : Parser → Parser := withFn fun f c s => adaptCacheableContextFn ({ · with savedPos? := s.pos }) f c s @@ -1280,12 +1354,29 @@ def withPositionAfterLinebreak : Parser → Parser := withFn fun f c s => let prev := s.stxStack.back adaptCacheableContextFn (fun c => if checkTailLinebreak prev then { c with savedPos? := s.pos } else c) f c s +/-- `withoutPosition(p)` runs `p` without the saved position, meaning that position-checking +parsers like `colGt` will have no effect. This is usually used by bracketing constructs like +`(...)` so that the user can locally override whitespace sensitivity. + +This parser has the same arity as `p` - it just forwards the results of `p`. -/ def withoutPosition (p : Parser) : Parser := adaptCacheableContext ({ · with savedPos? := none }) p +/-- `withForbidden tk p` runs `p` with `tk` as a "forbidden token". This means that if the token +appears anywhere in `p` (unless it is nested in `withoutForbidden`), parsing will immediately +stop there, making `tk` effectively a lowest-precedence operator. This is used for parsers like +`for x in arr do ...`: `arr` is parsed as `withForbidden "do" term` because otherwise `arr do ...` +would be treated as an application. + +This parser has the same arity as `p` - it just forwards the results of `p`. -/ def withForbidden (tk : Token) (p : Parser) : Parser := adaptCacheableContext ({ · with forbiddenTk? := tk }) p +/-- `withoutForbidden(p)` runs `p` disabling the "forbidden token" (see `withForbidden`), if any. +This is usually used by bracketing constructs like `(...)` because there is no parsing ambiguity +inside these nested constructs. + +This parser has the same arity as `p` - it just forwards the results of `p`. -/ def withoutForbidden (p : Parser) : Parser := adaptCacheableContext ({ · with forbiddenTk? := none }) p diff --git a/src/Lean/Parser/Command.lean b/src/Lean/Parser/Command.lean index 4ff7107243..d770151167 100644 --- a/src/Lean/Parser/Command.lean +++ b/src/Lean/Parser/Command.lean @@ -70,6 +70,20 @@ def «noncomputable» := leading_parser "noncomputable " def «unsafe» := leading_parser "unsafe " def «partial» := leading_parser "partial " def «nonrec» := leading_parser "nonrec " + +/-- `declModifiers` is the collection of modifiers on a declaration: +* a doc comment `/-! ... -/` +* a list of attributes `@[attr1, attr2]` +* a visibility specifier, `private` or `protected` +* `noncomputable` +* `unsafe` +* `partial` or `nonrec` + +All modifiers are optional, and have to come in the listed order. + +`nestedDeclModifiers` is the same as `declModifiers`, but attributes are printed +on the same line as the declaration. It is used for declarations nested inside other syntax, +such as inductive constructors, structure projections, and `let rec` / `where` definitions. -/ def declModifiers (inline : Bool) := leading_parser optional docComment >> optional (Term.«attributes» >> if inline then skip else ppDedent ppLine) >> @@ -77,10 +91,13 @@ def declModifiers (inline : Bool) := leading_parser optional «noncomputable» >> optional «unsafe» >> optional («partial» <|> «nonrec») +/-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/ def declId := leading_parser ident >> optional (".{" >> sepBy1 ident ", " >> "}") +/-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/ def declSig := leading_parser many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec +/-- `optDeclSig` matches the signature of a declaration with optional type: a list of binders and then possibly `: type` -/ def optDeclSig := leading_parser many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.optType def declValSimple := leading_parser @@ -92,12 +109,15 @@ def whereStructField := leading_parser def whereStructInst := leading_parser ppIndent ppSpace >> "where" >> sepByIndent (ppGroup whereStructField) "; " (allowTrailingSep := true) >> optional Term.whereDecls -/- - Remark: we should not use `Term.whereDecls` at `declVal` - because `Term.whereDecls` is defined using `Term.letRecDecl` which may contain attributes. - Issue #753 shows an example that fails to be parsed when we used `Term.whereDecls`. +/-- `declVal` matches the right-hand side of a declaration, one of: +* `:= expr` (a "simple declaration") +* a sequence of `| pat => expr` (a declaration by equations), shorthand for a `match` +* `where` and then a sequence of `field := value` initializers, shorthand for a structure constructor -/ def declVal := + -- Remark: we should not use `Term.whereDecls` at `declVal` + -- because `Term.whereDecls` is defined using `Term.letRecDecl` which may contain attributes. + -- Issue #753 shows an example that fails to be parsed when we used `Term.whereDecls`. withAntiquot (mkAntiquot "declVal" `Lean.Parser.Command.declVal (isPseudoKind := true)) <| declValSimple <|> declValEqns <|> whereStructInst def «abbrev» := leading_parser @@ -245,6 +265,7 @@ def openSimple := leading_parser many1 (ppSpace >> checkColGt >> ident) def openScoped := leading_parser " scoped" >> many1 (ppSpace >> checkColGt >> ident) +/-- `openDecl` is the body of an `open` declaration (see `open`) -/ def openDecl := withAntiquot (mkAntiquot "openDecl" `Lean.Parser.Command.openDecl (isPseudoKind := true)) <| openHiding <|> openRenaming <|> openOnly <|> openSimple <|> openScoped @@ -279,8 +300,10 @@ def initializeKeyword := leading_parser builtin_initialize registerBuiltinNodeKind ``eoi -@[run_builtin_parser_attribute_hooks] abbrev declModifiersF := declModifiers false -@[run_builtin_parser_attribute_hooks] abbrev declModifiersT := declModifiers true +@[run_builtin_parser_attribute_hooks, inherit_doc declModifiers] +abbrev declModifiersF := declModifiers false +@[run_builtin_parser_attribute_hooks, inherit_doc declModifiers] +abbrev declModifiersT := declModifiers true builtin_initialize register_parser_alias (kind := ``declModifiers) "declModifiers" declModifiersF diff --git a/src/Lean/Parser/Do.lean b/src/Lean/Parser/Do.lean index 78a57687bc..10aaf4cc18 100644 --- a/src/Lean/Parser/Do.lean +++ b/src/Lean/Parser/Do.lean @@ -25,9 +25,17 @@ def doSeqIndent := leading_parser many1Indent doSeqItem def doSeqBracketed := leading_parser "{" >> withoutPosition (many1 doSeqItem) >> ppLine >> "}" +/-- A `doSeq` is a sequence of `doElem`, the main argument after the `do` keyword and other +do elements that take blocks. It can either have the form `"{" (doElem ";"?)* "}"` or +`many1Indent (doElem ";"?)`, where `many1Indent` ensures that all the items are at +the same or higher indentation level as the first line. -/ def doSeq := withAntiquot (mkAntiquot "doSeq" decl_name% (isPseudoKind := true)) <| doSeqBracketed <|> doSeqIndent +/-- `termBeforeDo` is defined as `withForbidden("do", term)`, which will parse a term but +disallows `do` outside of a bracketing construct. This is used for parsers like `for _ in t do ...` +or `unless t do ...`, where we do not want `t do ...` to be parsed as an application of `t` to a +`do` block, which would otherwise be allowed. -/ def termBeforeDo := withForbidden "do" termParser attribute [run_builtin_parser_attribute_hooks] doSeq termBeforeDo diff --git a/src/Lean/Parser/Extra.lean b/src/Lean/Parser/Extra.lean index a00944f6f6..abeb359c36 100644 --- a/src/Lean/Parser/Extra.lean +++ b/src/Lean/Parser/Extra.lean @@ -21,15 +21,54 @@ attribute [run_builtin_parser_attribute_hooks] withOpen withOpenDecl dbgTraceState +/-- The parser `optional(p)`, or `(p)?`, parses `p` if it succeeds, +otherwise it succeeds with no value. + +Note that because `?` is a legal identifier character, one must write `(p)?` or `p ?` for +it to parse correctly. `ident?` will not work; one must write `(ident)?` instead. + +This parser has arity 1: it produces a `nullKind` node containing either zero arguments +(for the `none` case) or the list of arguments produced by `p`. +(In particular, if `p` has arity 0 then the two cases are not differentiated!) -/ @[run_builtin_parser_attribute_hooks] def optional (p : Parser) : Parser := optionalNoAntiquot (withAntiquotSpliceAndSuffix `optional p (symbol "?")) +/-- The parser `many(p)`, or `p*`, repeats `p` until it fails, and returns the list of results. + +The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be +automatically replaced by `group(p)` to ensure that it produces exactly 1 value. + +This parser has arity 1: it produces a `nullKind` node containing one argument for each +invocation of `p` (or `group(p)`). -/ @[run_builtin_parser_attribute_hooks] def many (p : Parser) : Parser := manyNoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*")) +/-- The parser `many1(p)`, or `p+`, repeats `p` until it fails, and returns the list of results. +`p` must succeed at least once, or this parser will fail. + +Note that this parser produces the same parse tree as the `many(p)` / `p*` combinator, +and one matches both `p*` and `p+` using `$[ .. ]*` syntax in a syntax match. +(There is no `$[ .. ]+` syntax.) + +The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be +automatically replaced by `group(p)` to ensure that it produces exactly 1 value. + +This parser has arity 1: it produces a `nullKind` node containing one argument for each +invocation of `p` (or `group(p)`). -/ @[run_builtin_parser_attribute_hooks] def many1 (p : Parser) : Parser := many1NoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*")) +/-- The parser `ident` parses a single identifier, possibly with namespaces, such as `foo` or +`bar.baz`. The identifier must not be a declared token, so for example it will not match `"def"` +because `def` is a keyword token. Tokens are implicitly declared by using them in string literals +in parser declarations, so `syntax foo := "bla"` will make `bla` no longer legal as an identifier. + +Identifiers can contain special characters or keywords if they are escaped using the `«»` characters: +`«def»` is an identifier named `def`, and `«x»` is treated the same as `x`. This is useful for +using disallowed characters in identifiers such as `«foo.bar».baz` or `«hello world»`. + +This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier. +You can use `TSyntax.getId` to extract the name from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def ident : Parser := withAntiquot (mkAntiquot "ident" identKind) identNoAntiquot @@ -37,30 +76,96 @@ attribute [run_builtin_parser_attribute_hooks] @[run_builtin_parser_attribute_hooks] def rawIdent : Parser := withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot +/-- The parser `hygieneInfo` parses no text, but captures the current macro scope information +as though it parsed an identifier at the current position. It returns a `hygieneInfoKind` node +around an `.ident` which is `Name.anonymous` but with macro scopes like a regular identifier. + +This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=` +substitutes for the identifier which would normally go there as in `have x :=`, so that we +can expand `have :=` to `have this :=` while retaining the usual macro name resolution behavior. +See [doc/macro_overview.md](https://github.com/leanprover/lean4/blob/master/doc/macro_overview.md) +for more information about macro hygiene. + +This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier. +You can use `TSyntax.getHygieneInfo` to extract the name from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def hygieneInfo : Parser := withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot +/-- The parser `num` parses a numeric literal in several bases: + +* Decimal: `129` +* Hexadecimal: `0xdeadbeef` +* Octal: `0o755` +* Binary: `0b1101` + +This parser has arity 1: it produces a `numLitKind` node containing an atom with the text of the +literal. +You can use `TSyntax.getNat` to extract the number from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def numLit : Parser := withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot +/-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`. + +This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text +of the literal. +You can use `TSyntax.getScientific` to extract the parts from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def scientificLit : Parser := withAntiquot (mkAntiquot "scientific" scientificLitKind) scientificLitNoAntiquot +/-- The parser `str` parses a string literal, such as `"foo"` or `"\r\n"`. Strings can contain +C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters like `∈`. +Newlines in a string are interpreted literally. + +This parser has arity 1: it produces a `strLitKind` node containing an atom with the raw +literal (including the quote marks and without interpreting the escapes). +You can use `TSyntax.getString` to decode the string from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def strLit : Parser := withAntiquot (mkAntiquot "str" strLitKind) strLitNoAntiquot +/-- The parser `char` parses a character literal, such as `'a'` or `'\n'`. Character literals can +contain C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters +like `∈`, but must evaluate to a single unicode codepoint, so `'♥'` is allowed but `'❤️'` is not +(since it is two codepoints but one grapheme cluster). + +This parser has arity 1: it produces a `charLitKind` node containing an atom with the raw +literal (including the quote marks and without interpreting the escapes). +You can use `TSyntax.getChar` to decode the string from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def charLit : Parser := withAntiquot (mkAntiquot "char" charLitKind) charLitNoAntiquot +/-- The parser `name` parses a name literal like `` `foo``. The syntax is the same as for identifiers +(see `ident`) but with a leading backquote. + +This parser has arity 1: it produces a `nameLitKind` node containing the raw literal +(including the backquote). +You can use `TSyntax.getName` to extract the name from the resulting syntax object. -/ @[run_builtin_parser_attribute_hooks] def nameLit : Parser := withAntiquot (mkAntiquot "name" nameLitKind) nameLitNoAntiquot +/-- The parser `group(p)` parses the same thing as `p`, but it wraps the results in a `groupKind` +node. + +This parser always has arity 1, even if `p` does not. Parsers like `p*` are automatically +rewritten to `group(p)*` if `p` does not have arity 1, so that the results from separate invocations +of `p` can be differentiated. -/ @[run_builtin_parser_attribute_hooks, inline] def group (p : Parser) : Parser := node groupKind p +/-- The parser `many1Indent(p)` is equivalent to `withPosition((colGe p)+)`. This has the effect of +parsing one or more occurrences of `p`, where each subsequent `p` parse needs to be indented +the same or more than the first parse. + +This parser has arity 1, and returns a list of the results from `p`. +`p` is "auto-grouped" if it is not arity 1. -/ @[run_builtin_parser_attribute_hooks, inline] def many1Indent (p : Parser) : Parser := withPosition $ many1 (checkColGe "irrelevant" >> p) +/-- The parser `manyIndent(p)` is equivalent to `withPosition((colGe p)*)`. This has the effect of +parsing zero or more occurrences of `p`, where each subsequent `p` parse needs to be indented +the same or more than the first parse. + +This parser has arity 1, and returns a list of the results from `p`. +`p` is "auto-grouped" if it is not arity 1. -/ @[run_builtin_parser_attribute_hooks, inline] def manyIndent (p : Parser) : Parser := withPosition $ many (checkColGe "irrelevant" >> p) diff --git a/src/Lean/Parser/StrInterpolation.lean b/src/Lean/Parser/StrInterpolation.lean index ad0571e6d9..2a8ac5815a 100644 --- a/src/Lean/Parser/StrInterpolation.lean +++ b/src/Lean/Parser/StrInterpolation.lean @@ -58,6 +58,17 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s => info := mkAtomicInfo "interpolatedStr" } +/-- The parser `interpolatedStr(p)` parses a string literal like `"foo"` (see `str`), but the string +may also contain `{}` escapes, and within the escapes the parser `p` is used. For example, +`interpolatedStr(term)` will parse `"foo {2 + 2}"`, where `2 + 2` is parsed as a term rather than +as a string. Note that the full Lean term grammar is available here, including string literals, +so for example `"foo {"bar" ++ "baz"}"` is a legal interpolated string (which evaluates to +`foo barbaz`). + +This parser has arity 1, and returns a `interpolatedStrKind` with an odd number of arguments, +alternating between chunks of literal text and results from `p`. The literal chunks contain +uninterpreted substrings of the input. For example, `"foo\n{2 + 2}"` would have three arguments: +an atom `"foo\n{`, the parsed `2 + 2` term, and then the atom `}"`. -/ def interpolatedStr (p : Parser) : Parser := withAntiquot (mkAntiquot "interpolatedStr" interpolatedStrKind) $ interpolatedStrNoAntiquot p diff --git a/src/Lean/Parser/Term.lean b/src/Lean/Parser/Term.lean index 47d0853c0e..7c5c28e443 100644 --- a/src/Lean/Parser/Term.lean +++ b/src/Lean/Parser/Term.lean @@ -18,6 +18,11 @@ def commentBody.parenthesizer := PrettyPrinter.Parenthesizer.visitToken @[combinator_formatter commentBody] def commentBody.formatter := PrettyPrinter.Formatter.visitAtom Name.anonymous +/-- A `docComment` parses a "documentation comment" like `/-- foo -/`. This is not treated like +a regular comment (that is, as whitespace); it is parsed and forms part of the syntax tree structure. + +A `docComment` node contains a `/--` atom and then the remainder of the comment, `foo -/` in this +example. Use `TSyntax.getDocString` to extract the body text from a doc string syntax node. -/ def docComment := leading_parser ppDedent $ "/--" >> ppSpace >> commentBody >> ppLine end Command @@ -34,10 +39,28 @@ builtin_initialize namespace Tactic +/-- `sepByIndentSemicolon(p)` parses a sequence of `p` optionally followed by `;`, +similar to `manyIndent(p ";"?)`, except that if two occurrences of `p` occur on the same line, +the `;` is mandatory. This is used by tactic parsing, so that +``` +example := by + skip + skip +``` +is legal, but `by skip skip` is not - it must be written as `by skip; skip`. -/ @[run_builtin_parser_attribute_hooks] def sepByIndentSemicolon (p : Parser) : Parser := sepByIndent p "; " (allowTrailingSep := true) +/-- `sepBy1IndentSemicolon(p)` parses a (nonempty) sequence of `p` optionally followed by `;`, +similar to `many1Indent(p ";"?)`, except that if two occurrences of `p` occur on the same line, +the `;` is mandatory. This is used by tactic parsing, so that +``` +example := by + skip + skip +``` +is legal, but `by skip skip` is not - it must be written as `by skip; skip`. -/ @[run_builtin_parser_attribute_hooks] def sepBy1IndentSemicolon (p : Parser) : Parser := sepBy1Indent p "; " (allowTrailingSep := true) @@ -117,6 +140,8 @@ def optSemicolon (p : Parser) : Parser := /-- A placeholder term, to be synthesized by unification. -/ @[builtin_term_parser] def hole := leading_parser "_" +/-- Parses a "synthetic hole", that is, `?foo` or `?_`. +This syntax is used to construct named metavariables. -/ @[builtin_term_parser] def syntheticHole := leading_parser "?" >> (ident <|> hole) def binderIdent : Parser := ident <|> hole @@ -165,6 +190,8 @@ def optIdent : Parser := def fromTerm := leading_parser "from " >> termParser def showRhs := fromTerm <|> byTactic' +/-- A `sufficesDecl` represents everything that comes after the `suffices` keyword: +an optional `x :`, then a term `ty`, then `from val` or `by tac`. -/ def sufficesDecl := leading_parser (atomic (group (binderIdent >> " : ")) <|> hygieneInfo) >> termParser >> ppSpace >> showRhs @[builtin_term_parser] def «suffices» := leading_parser:leadPrec @@ -251,6 +278,12 @@ and solved by typeclass inference of the specified class. -/ def instBinder := ppGroup <| leading_parser "[" >> withoutPosition (optIdent >> termParser) >> "]" +/-- A `bracketedBinder` matches any kind of binder group that uses some kind of brackets: +* An explicit binder like `(x y : A)` +* An implicit binder like `{x y : A}` +* A strict implicit binder, `⦃y z : A⦄` or its ASCII alternative `{{y z : A}}` +* An instance binder `[A]` or `[x : A]` (multiple variables are not allowed here) +-/ def bracketedBinder (requireType := false) := withAntiquot (mkAntiquot "bracketedBinder" decl_name% (isPseudoKind := true)) <| explicitBinder requireType <|> strictImplicitBinder requireType <|> @@ -297,6 +330,8 @@ instance : Coe (TSyntax ``matchAltExpr) (TSyntax ``matchAlt) where def matchAlts (rhsParser : Parser := termParser) : Parser := leading_parser withPosition $ many1Indent (ppLine >> matchAlt rhsParser) +/-- `matchDiscr` matches a "match discriminant", either `h : tm` or `tm`, used in `match` as +`match h1 : e1, e2, h3 : e3 with ...`. -/ def matchDiscr := leading_parser optional (atomic (ident >> " : ")) >> termParser @@ -419,9 +454,13 @@ def letPatDecl := leading_parser (withAnonymousAntiquot := false) -/ def letEqnsDecl := leading_parser (withAnonymousAntiquot := false) letIdLhs >> (" := " <|> matchAlts) --- Remark: we disable anonymous antiquotations here to make sure --- anonymous antiquotations (e.g., `$x`) are not `letDecl` +/-- `letDecl` matches the body of a let declaration `let f x1 x2 := e`, +`let pat := e` (where `pat` is an arbitrary term) or `let f | pat1 => e1 | pat2 => e2 ...` +(a pattern matching declaration), except for the `let` keyword itself. +`let rec` declarations are not handled here. -/ def letDecl := leading_parser (withAnonymousAntiquot := false) + -- Remark: we disable anonymous antiquotations here to make sure + -- anonymous antiquotations (e.g., `$x`) are not `letDecl` notFollowedBy (nonReservedSymbol "rec") "rec" >> (letIdDecl <|> letPatDecl <|> letEqnsDecl) /-- @@ -472,6 +511,9 @@ def haveIdDecl := leading_parser (withAnonymousAntiquot := false) atomic (haveIdLhs >> " := ") >> termParser def haveEqnsDecl := leading_parser (withAnonymousAntiquot := false) haveIdLhs >> matchAlts +/-- `haveDecl` matches the body of a have declaration: `have := e`, `have f x1 x2 := e`, +`have pat := e` (where `pat` is an arbitrary term) or `have f | pat1 => e1 | pat2 => e2 ...` +(a pattern matching declaration), except for the `have` keyword itself. -/ def haveDecl := leading_parser (withAnonymousAntiquot := false) haveIdDecl <|> (ppSpace >> letPatDecl) <|> haveEqnsDecl @[builtin_term_parser] def «have» := leading_parser:leadPrec @@ -479,13 +521,17 @@ def haveDecl := leading_parser (withAnonymousAntiquot := false) def «scoped» := leading_parser "scoped " def «local» := leading_parser "local " +/-- `attrKind` matches `("scoped" <|> "local")?`, used before an attribute like `@[local simp]`. -/ def attrKind := leading_parser optional («scoped» <|> «local») def attrInstance := ppGroup $ leading_parser attrKind >> attrParser def attributes := leading_parser "@[" >> withoutPosition (sepBy1 attrInstance ", ") >> "] " +/-- `letRecDecl` matches the body of a let-rec declaration: a doc comment, attributes, and then +a let declaration without the `let` keyword, such as `/-- foo -/ @[simp] bar := 1`. -/ def letRecDecl := leading_parser optional Command.docComment >> optional «attributes» >> letDecl +/-- `letRecDecls` matches `letRecDecl,+`, a comma-separated list of let-rec declarations (see `letRecDecl`). -/ def letRecDecls := leading_parser sepBy1 letRecDecl ", " @[builtin_term_parser] diff --git a/tests/lean/interactive/hover.lean.expected.out b/tests/lean/interactive/hover.lean.expected.out index 4585e46fd0..de0490371a 100644 --- a/tests/lean/interactive/hover.lean.expected.out +++ b/tests/lean/interactive/hover.lean.expected.out @@ -144,7 +144,7 @@ {"start": {"line": 87, "character": 14}, "end": {"line": 87, "character": 36}}, "contents": {"value": - "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\n*import Lean.Parser.Do*", + "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\nA `doSeq` is a sequence of `doElem`, the main argument after the `do` keyword and other\ndo elements that take blocks. It can either have the form `\"{\" (doElem \";\"?)* \"}\"` or\n`many1Indent (doElem \";\"?)`, where `many1Indent` ensures that all the items are at\nthe same or higher indentation level as the first line. \n***\n*import Lean.Parser.Do*", "kind": "markdown"}} {"textDocument": {"uri": "file://hover.lean"}, "position": {"line": 89, "character": 30}} @@ -152,7 +152,7 @@ {"start": {"line": 89, "character": 29}, "end": {"line": 89, "character": 34}}, "contents": {"value": - "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\n*import Lean.Parser.Do*", + "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\nA `doSeq` is a sequence of `doElem`, the main argument after the `do` keyword and other\ndo elements that take blocks. It can either have the form `\"{\" (doElem \";\"?)* \"}\"` or\n`many1Indent (doElem \";\"?)`, where `many1Indent` ensures that all the items are at\nthe same or higher indentation level as the first line. \n***\n*import Lean.Parser.Do*", "kind": "markdown"}} {"textDocument": {"uri": "file://hover.lean"}, "position": {"line": 92, "character": 2}} @@ -370,7 +370,12 @@ "contents": {"value": "```lean\nn : ℕ\n```", "kind": "markdown"}} {"textDocument": {"uri": "file://hover.lean"}, "position": {"line": 192, "character": 2}} -null +{"range": + {"start": {"line": 192, "character": 0}, "end": {"line": 192, "character": 9}}, + "contents": + {"value": + "`declModifiers` is the collection of modifiers on a declaration:\n* a doc comment `/-! ... -/`\n* a list of attributes `@[attr1, attr2]`\n* a visibility specifier, `private` or `protected`\n* `noncomputable`\n* `unsafe`\n* `partial` or `nonrec`\n\nAll modifiers are optional, and have to come in the listed order.\n\n`nestedDeclModifiers` is the same as `declModifiers`, but attributes are printed\non the same line as the declaration. It is used for declarations nested inside other syntax,\nsuch as inductive constructors, structure projections, and `let rec` / `where` definitions. ", + "kind": "markdown"}} {"textDocument": {"uri": "file://hover.lean"}, "position": {"line": 198, "character": 2}} {"range":