doc: document all parser aliases (#2499)

2023-09-06 11:02:25 +02:00 · 2023-09-06 11:02:25 +02:00 · 2037094f8c
commit 2037094f8c
parent 84bf315ac8
7 changed files with 319 additions and 30 deletions
--- a/src/Lean/Parser/Basic.lean
+++ b/src/Lean/Parser/Basic.lean
@ -92,10 +92,15 @@ def andthenFn (p q : ParserFn) : ParserFn := fun c s =>
  firstTokens   := p.firstTokens.seq q.firstTokens
 }

-def andthen (p q : Parser) : Parser := {
-  info := andthenInfo p.info q.info,
+/-- The `andthen(p, q)` combinator, usually written as adjacency in syntax declarations (`p q`),
+parses `p` followed by `q`.
+
+The arity of this parser is the sum of the arities of `p` and `q`:
+that is, it accumulates all the nodes produced by `p` followed by the nodes from `q` into the list
+of arguments to the surrounding parse node. -/
+def andthen (p q : Parser) : Parser where
+  info := andthenInfo p.info q.info
  fn   := andthenFn p.fn q.fn
-}

 instance : AndThen Parser where
  andThen a b := andthen a (b ())
@ -265,10 +270,9 @@ def orelseFn (p q : ParserFn) : ParserFn :=
  NOTE: In order for the pretty printer to retrace an `orelse`, `p` must be a call to `node` or some other parser
  producing a single node kind. Nested `orelse` calls are flattened for this, i.e. `(node k1 p1 <|> node k2 p2) <|> ...`
  is fine as well. -/
-def orelse (p q : Parser) : Parser := {
+def orelse (p q : Parser) : Parser where
  info := orelseInfo p.info q.info
  fn   := orelseFn p.fn q.fn
-}

 instance : OrElse Parser where
  orElse a b := orelse a (b ())
@ -284,6 +288,12 @@ def atomicFn (p : ParserFn) : ParserFn := fun c s =>
  | ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩
  | other                       => other

+/-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails,
+but if `p` fails after consuming some tokens `atomic(p)` will fail without consuming tokens.
+This is important for the `p <|> q` combinator, because it is not backtracking, and will fail if
+`p` fails after consuming some tokens. To get backtracking behavior, use `atomic(p) <|> q` instead.
+
+This parser has the same arity as `p` - it produces the same result as `p`. -/
 def atomic : Parser → Parser := withFn atomicFn

 def optionalFn (p : ParserFn) : ParserFn := fun c s =>
@ -310,6 +320,11 @@ def lookaheadFn (p : ParserFn) : ParserFn := fun c s =>
  let s      := p c s
  if s.hasError then s else s.restore iniSz iniPos

+/-- `lookahead(p)` runs `p` and fails if `p` does, but it produces no parse nodes and rewinds the
+position to the original state on success. So for example `lookahead("=>")` will ensure that the
+next token is `"=>"`, without actually consuming this token.
+
+This parser has arity 0 - it does not capture anything. -/
 def lookahead : Parser → Parser := withFn lookaheadFn

 def notFollowedByFn (p : ParserFn) (msg : String) : ParserFn := fun c s =>
@ -322,9 +337,12 @@ def notFollowedByFn (p : ParserFn) (msg : String) : ParserFn := fun c s =>
    let s := s.restore iniSz iniPos
    s.mkUnexpectedError s!"unexpected {msg}"

-def notFollowedBy (p : Parser) (msg : String) : Parser := {
+/-- `notFollowedBy(p, "foo")` succeeds iff `p` fails;
+if `p` succeeds then it fails with the message `"unexpected foo"`.
+
+This parser has arity 0 - it does not capture anything. -/
+def notFollowedBy (p : Parser) (msg : String) : Parser where
  fn := notFollowedByFn p.fn msg
-}

 partial def manyAux (p : ParserFn) : ParserFn := fun c s => Id.run do
  let iniSz  := s.stackSize
@ -945,10 +963,13 @@ def checkWsBeforeFn (errorMsg : String) : ParserFn := fun _ s =>
  let prev := s.stxStack.back
  if checkTailWs prev then s else s.mkError errorMsg

-def checkWsBefore (errorMsg : String := "space before") : Parser := {
+/-- The `ws` parser requires that there is some whitespace at this location.
+For example, the parser `"foo" ws "+"` parses `foo +` or `foo/- -/+` but not `foo+`.
+
+This parser has arity 0 - it does not capture anything. -/
+def checkWsBefore (errorMsg : String := "space before") : Parser where
  info := epsilonInfo
  fn   := checkWsBeforeFn errorMsg
-}

 def checkTailLinebreak (prev : Syntax) : Bool :=
  match prev.getTailInfo with
@ -959,10 +980,13 @@ def checkLinebreakBeforeFn (errorMsg : String) : ParserFn := fun _ s =>
  let prev := s.stxStack.back
  if checkTailLinebreak prev then s else s.mkError errorMsg

-def checkLinebreakBefore (errorMsg : String := "line break") : Parser := {
+/-- The `linebreak` parser requires that there is at least one line break at this location.
+(The line break may be inside a comment.)
+
+This parser has arity 0 - it does not capture anything. -/
+def checkLinebreakBefore (errorMsg : String := "line break") : Parser where
  info := epsilonInfo
  fn   := checkLinebreakBeforeFn errorMsg
-}

 private def pickNonNone (stack : SyntaxStack) : Syntax :=
  match stack.toSubarray.findRev? fun stx => !stx.isNone with
@ -973,6 +997,13 @@ def checkNoWsBeforeFn (errorMsg : String) : ParserFn := fun _ s =>
  let prev := pickNonNone s.stxStack
  if checkTailNoWs prev then s else s.mkError errorMsg

+/-- The `noWs` parser requires that there is *no* whitespace between the preceding and following
+parsers. For example, the parser `"foo" noWs "+"` parses `foo+` but not `foo +`.
+
+This is almost the same as `"foo+"`, but using this parser will make `foo+` a token, which may cause
+problems for the use of `"foo"` and `"+"` as separate tokens in other parsers.
+
+This parser has arity 0 - it does not capture anything. -/
 def checkNoWsBefore (errorMsg : String := "no space before") : Parser := {
  info := epsilonInfo
  fn   := checkNoWsBeforeFn errorMsg
@ -1234,8 +1265,13 @@ def checkColEqFn (errorMsg : String) : ParserFn := fun c s =>
    if pos.column = savedPos.column then s
    else s.mkError errorMsg

-def checkColEq (errorMsg : String := "checkColEq") : Parser :=
-  { fn := checkColEqFn errorMsg }
+/-- The `colEq` parser ensures that the next token starts at exactly the column of the saved
+position (see `withPosition`). This can be used to do whitespace sensitive syntax like
+a `by` block or `do` block, where all the lines have to line up.
+
+This parser has arity 0 - it does not capture anything. -/
+def checkColEq (errorMsg : String := "checkColEq") : Parser where
+  fn := checkColEqFn errorMsg

 def checkColGeFn (errorMsg : String) : ParserFn := fun c s =>
  match c.savedPos? with
@ -1246,8 +1282,15 @@ def checkColGeFn (errorMsg : String) : ParserFn := fun c s =>
    if pos.column ≥ savedPos.column then s
    else s.mkError errorMsg

-def checkColGe (errorMsg : String := "checkColGe") : Parser :=
-  { fn := checkColGeFn errorMsg }
+/-- The `colGe` parser requires that the next token starts from at least the column of the saved
+position (see `withPosition`), but allows it to be more indented.
+This can be used for whitespace sensitive syntax to ensure that a block does not go outside a
+certain indentation scope. For example it is used in the lean grammar for `else if`, to ensure
+that the `else` is not less indented than the `if` it matches with.
+
+This parser has arity 0 - it does not capture anything. -/
+def checkColGe (errorMsg : String := "checkColGe") : Parser where
+  fn := checkColGeFn errorMsg

 def checkColGtFn (errorMsg : String) : ParserFn := fun c s =>
  match c.savedPos? with
@ -1258,8 +1301,20 @@ def checkColGtFn (errorMsg : String) : ParserFn := fun c s =>
    if pos.column > savedPos.column then s
    else s.mkError errorMsg

-def checkColGt (errorMsg : String := "checkColGt") : Parser :=
-  { fn := checkColGtFn errorMsg }
+/-- The `colGt` parser requires that the next token starts a strictly greater column than the saved
+position (see `withPosition`). This can be used for whitespace sensitive syntax for the arguments
+to a tactic, to ensure that the following tactic is not interpreted as an argument.
+```
+example (x : False) : False := by
+  revert x
+  exact id
+```
+Here, the `revert` tactic is followed by a list of `colGt ident`, because otherwise it would
+interpret `exact` as an identifier and try to revert a variable named `exact`.
+
+This parser has arity 0 - it does not capture anything. -/
+def checkColGt (errorMsg : String := "checkColGt") : Parser where
+  fn := checkColGtFn errorMsg

 def checkLineEqFn (errorMsg : String) : ParserFn := fun c s =>
  match c.savedPos? with
@ -1270,9 +1325,28 @@ def checkLineEqFn (errorMsg : String) : ParserFn := fun c s =>
    if pos.line == savedPos.line then s
    else s.mkError errorMsg

-def checkLineEq (errorMsg : String := "checkLineEq") : Parser :=
-  { fn := checkLineEqFn errorMsg }
+/-- The `lineEq` parser requires that the current token is on the same line as the saved position
+(see `withPosition`). This can be used to ensure that composite tokens are not "broken up" across
+different lines. For example, `else if` is parsed using `lineEq` to ensure that the two tokens
+are on the same line.

+This parser has arity 0 - it does not capture anything. -/
+def checkLineEq (errorMsg : String := "checkLineEq") : Parser where
+  fn := checkLineEqFn errorMsg
+
+/-- `withPosition(p)` runs `p` while setting the "saved position" to the current position.
+This has no effect on its own, but various other parsers access this position to achieve some
+composite effect:
+
+* `colGt`, `colGe`, `colEq` compare the column of the saved position to the current position,
+  used to implement Python-style indentation sensitive blocks
+* `lineEq` ensures that the current position is still on the same line as the saved position,
+  used to implement composite tokens
+
+The saved position is only available in the read-only state, which is why this is a scoping parser:
+after the `withPosition(..)` block the saved position will be restored to its original value.
+
+This parser has the same arity as `p` - it just forwards the results of `p`. -/
 def withPosition : Parser → Parser := withFn fun f c s =>
    adaptCacheableContextFn ({ · with savedPos? := s.pos }) f c s

@ -1280,12 +1354,29 @@ def withPositionAfterLinebreak : Parser → Parser := withFn fun f c s =>
  let prev := s.stxStack.back
  adaptCacheableContextFn (fun c => if checkTailLinebreak prev then { c with savedPos? := s.pos } else c) f c s

+/-- `withoutPosition(p)` runs `p` without the saved position, meaning that position-checking
+parsers like `colGt` will have no effect. This is usually used by bracketing constructs like
+`(...)` so that the user can locally override whitespace sensitivity.
+
+This parser has the same arity as `p` - it just forwards the results of `p`. -/
 def withoutPosition (p : Parser) : Parser :=
  adaptCacheableContext ({ · with savedPos? := none }) p

+/-- `withForbidden tk p` runs `p` with `tk` as a "forbidden token". This means that if the token
+appears anywhere in `p` (unless it is nested in `withoutForbidden`), parsing will immediately
+stop there, making `tk` effectively a lowest-precedence operator. This is used for parsers like
+`for x in arr do ...`: `arr` is parsed as `withForbidden "do" term` because otherwise `arr do ...`
+would be treated as an application.
+
+This parser has the same arity as `p` - it just forwards the results of `p`. -/
 def withForbidden (tk : Token) (p : Parser) : Parser :=
  adaptCacheableContext ({ · with forbiddenTk? := tk }) p

+/-- `withoutForbidden(p)` runs `p` disabling the "forbidden token" (see `withForbidden`), if any.
+This is usually used by bracketing constructs like `(...)` because there is no parsing ambiguity
+inside these nested constructs.
+
+This parser has the same arity as `p` - it just forwards the results of `p`. -/
 def withoutForbidden (p : Parser) : Parser :=
  adaptCacheableContext ({ · with forbiddenTk? := none }) p

--- a/src/Lean/Parser/Command.lean
+++ b/src/Lean/Parser/Command.lean
@ -70,6 +70,20 @@ def «noncomputable»  := leading_parser "noncomputable "
 def «unsafe»         := leading_parser "unsafe "
 def «partial»        := leading_parser "partial "
 def «nonrec»         := leading_parser "nonrec "
+
+/-- `declModifiers` is the collection of modifiers on a declaration:
+* a doc comment `/-! ... -/`
+* a list of attributes `@[attr1, attr2]`
+* a visibility specifier, `private` or `protected`
+* `noncomputable`
+* `unsafe`
+* `partial` or `nonrec`
+
+All modifiers are optional, and have to come in the listed order.
+
+`nestedDeclModifiers` is the same as `declModifiers`, but attributes are printed
+on the same line as the declaration. It is used for declarations nested inside other syntax,
+such as inductive constructors, structure projections, and `let rec` / `where` definitions. -/
 def declModifiers (inline : Bool) := leading_parser
  optional docComment >>
  optional (Term.«attributes» >> if inline then skip else ppDedent ppLine) >>
@ -77,10 +91,13 @@ def declModifiers (inline : Bool) := leading_parser
  optional «noncomputable» >>
  optional «unsafe» >>
  optional («partial» <|> «nonrec»)
+/-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/
 def declId           := leading_parser
  ident >> optional (".{" >> sepBy1 ident ", " >> "}")
+/-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/
 def declSig          := leading_parser
  many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec
+/-- `optDeclSig` matches the signature of a declaration with optional type: a list of binders and then possibly `: type` -/
 def optDeclSig       := leading_parser
  many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.optType
 def declValSimple    := leading_parser
@ -92,12 +109,15 @@ def whereStructField := leading_parser
 def whereStructInst  := leading_parser
  ppIndent ppSpace >> "where" >> sepByIndent (ppGroup whereStructField) "; " (allowTrailingSep := true) >>
  optional Term.whereDecls
-/-
-  Remark: we should not use `Term.whereDecls` at `declVal`
-  because `Term.whereDecls` is defined using `Term.letRecDecl` which may contain attributes.
-  Issue #753 shows an example that fails to be parsed when we used `Term.whereDecls`.
+/-- `declVal` matches the right-hand side of a declaration, one of:
+* `:= expr` (a "simple declaration")
+* a sequence of `| pat => expr` (a declaration by equations), shorthand for a `match`
+* `where` and then a sequence of `field := value` initializers, shorthand for a structure constructor
 -/
 def declVal          :=
+  -- Remark: we should not use `Term.whereDecls` at `declVal`
+  -- because `Term.whereDecls` is defined using `Term.letRecDecl` which may contain attributes.
+  -- Issue #753 shows an example that fails to be parsed when we used `Term.whereDecls`.
  withAntiquot (mkAntiquot "declVal" `Lean.Parser.Command.declVal (isPseudoKind := true)) <|
    declValSimple <|> declValEqns <|> whereStructInst
 def «abbrev»         := leading_parser
@ -245,6 +265,7 @@ def openSimple       := leading_parser
  many1 (ppSpace >> checkColGt >> ident)
 def openScoped       := leading_parser
  " scoped" >> many1 (ppSpace >> checkColGt >> ident)
+/-- `openDecl` is the body of an `open` declaration (see `open`) -/
 def openDecl         :=
  withAntiquot (mkAntiquot "openDecl" `Lean.Parser.Command.openDecl (isPseudoKind := true)) <|
    openHiding <|> openRenaming <|> openOnly <|> openSimple <|> openScoped
@ -279,8 +300,10 @@ def initializeKeyword := leading_parser
 builtin_initialize
  registerBuiltinNodeKind ``eoi

-@[run_builtin_parser_attribute_hooks] abbrev declModifiersF := declModifiers false
-@[run_builtin_parser_attribute_hooks] abbrev declModifiersT := declModifiers true
+@[run_builtin_parser_attribute_hooks, inherit_doc declModifiers]
+abbrev declModifiersF := declModifiers false
+@[run_builtin_parser_attribute_hooks, inherit_doc declModifiers]
+abbrev declModifiersT := declModifiers true

 builtin_initialize
  register_parser_alias (kind := ``declModifiers) "declModifiers"       declModifiersF
--- a/src/Lean/Parser/Do.lean
+++ b/src/Lean/Parser/Do.lean
@ -25,9 +25,17 @@ def doSeqIndent    := leading_parser
  many1Indent doSeqItem
 def doSeqBracketed := leading_parser
  "{" >> withoutPosition (many1 doSeqItem) >> ppLine >> "}"
+/-- A `doSeq` is a sequence of `doElem`, the main argument after the `do` keyword and other
+do elements that take blocks. It can either have the form `"{" (doElem ";"?)* "}"` or
+`many1Indent (doElem ";"?)`, where `many1Indent` ensures that all the items are at
+the same or higher indentation level as the first line. -/
 def doSeq          :=
  withAntiquot (mkAntiquot "doSeq" decl_name% (isPseudoKind := true)) <|
    doSeqBracketed <|> doSeqIndent
+/-- `termBeforeDo` is defined as `withForbidden("do", term)`, which will parse a term but
+disallows `do` outside of a bracketing construct. This is used for parsers like `for _ in t do ...`
+or `unless t do ...`, where we do not want `t do ...` to be parsed as an application of `t` to a
+`do` block, which would otherwise be allowed. -/
 def termBeforeDo := withForbidden "do" termParser

 attribute [run_builtin_parser_attribute_hooks] doSeq termBeforeDo
--- a/src/Lean/Parser/Extra.lean
+++ b/src/Lean/Parser/Extra.lean
@ -21,15 +21,54 @@ attribute [run_builtin_parser_attribute_hooks]
  withOpen withOpenDecl
  dbgTraceState

+/-- The parser `optional(p)`, or `(p)?`, parses `p` if it succeeds,
+otherwise it succeeds with no value.
+
+Note that because `?` is a legal identifier character, one must write `(p)?` or `p ?` for
+it to parse correctly. `ident?` will not work; one must write `(ident)?` instead.
+
+This parser has arity 1: it produces a `nullKind` node containing either zero arguments
+(for the `none` case) or the list of arguments produced by `p`.
+(In particular, if `p` has arity 0 then the two cases are not differentiated!) -/
@[run_builtin_parser_attribute_hooks] def optional (p : Parser) : Parser :=
  optionalNoAntiquot (withAntiquotSpliceAndSuffix `optional p (symbol "?"))

+/-- The parser `many(p)`, or `p*`, repeats `p` until it fails, and returns the list of results.
+
+The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be
+automatically replaced by `group(p)` to ensure that it produces exactly 1 value.
+
+This parser has arity 1: it produces a `nullKind` node containing one argument for each
+invocation of `p` (or `group(p)`). -/
@[run_builtin_parser_attribute_hooks] def many (p : Parser) : Parser :=
  manyNoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*"))

+/-- The parser `many1(p)`, or `p+`, repeats `p` until it fails, and returns the list of results.
+`p` must succeed at least once, or this parser will fail.
+
+Note that this parser produces the same parse tree as the `many(p)` / `p*` combinator,
+and one matches both `p*` and `p+` using `$[ .. ]*` syntax in a syntax match.
+(There is no `$[ .. ]+` syntax.)
+
+The argument `p` is "auto-grouped", meaning that if the arity is greater than 1 it will be
+automatically replaced by `group(p)` to ensure that it produces exactly 1 value.
+
+This parser has arity 1: it produces a `nullKind` node containing one argument for each
+invocation of `p` (or `group(p)`). -/
@[run_builtin_parser_attribute_hooks] def many1 (p : Parser) : Parser :=
  many1NoAntiquot (withAntiquotSpliceAndSuffix `many p (symbol "*"))

+/-- The parser `ident` parses a single identifier, possibly with namespaces, such as `foo` or
+`bar.baz`. The identifier must not be a declared token, so for example it will not match `"def"`
+because `def` is a keyword token. Tokens are implicitly declared by using them in string literals
+in parser declarations, so `syntax foo := "bla"` will make `bla` no longer legal as an identifier.
+
+Identifiers can contain special characters or keywords if they are escaped using the `«»` characters:
+`«def»` is an identifier named `def`, and `«x»` is treated the same as `x`. This is useful for
+using disallowed characters in identifiers such as `«foo.bar».baz` or `«hello world»`.
+
+This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier.
+You can use `TSyntax.getId` to extract the name from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def ident : Parser :=
  withAntiquot (mkAntiquot "ident" identKind) identNoAntiquot

@ -37,30 +76,96 @@ attribute [run_builtin_parser_attribute_hooks]
@[run_builtin_parser_attribute_hooks] def rawIdent : Parser :=
  withAntiquot (mkAntiquot "ident" identKind) rawIdentNoAntiquot

+/-- The parser `hygieneInfo` parses no text, but captures the current macro scope information
+as though it parsed an identifier at the current position. It returns a `hygieneInfoKind` node
+around an `.ident` which is `Name.anonymous` but with macro scopes like a regular identifier.
+
+This is used to implement `have := ...` syntax: the `hygieneInfo` between the `have` and `:=`
+substitutes for the identifier which would normally go there as in `have x :=`, so that we
+can expand `have :=` to `have this :=` while retaining the usual macro name resolution behavior.
+See [doc/macro_overview.md](https://github.com/leanprover/lean4/blob/master/doc/macro_overview.md)
+for more information about macro hygiene.
+
+This parser has arity 1: it produces a `Syntax.ident` node containing the parsed identifier.
+You can use `TSyntax.getHygieneInfo` to extract the name from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def hygieneInfo : Parser :=
  withAntiquot (mkAntiquot "hygieneInfo" hygieneInfoKind (anonymous := false)) hygieneInfoNoAntiquot

+/-- The parser `num` parses a numeric literal in several bases:
+
+* Decimal: `129`
+* Hexadecimal: `0xdeadbeef`
+* Octal: `0o755`
+* Binary: `0b1101`
+
+This parser has arity 1: it produces a `numLitKind` node containing an atom with the text of the
+literal.
+You can use `TSyntax.getNat` to extract the number from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def numLit : Parser :=
  withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot

+/-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`.
+
+This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text
+of the literal.
+You can use `TSyntax.getScientific` to extract the parts from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def scientificLit : Parser :=
  withAntiquot (mkAntiquot "scientific" scientificLitKind) scientificLitNoAntiquot

+/-- The parser `str` parses a string literal, such as `"foo"` or `"\r\n"`. Strings can contain
+C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters like `∈`.
+Newlines in a string are interpreted literally.
+
+This parser has arity 1: it produces a `strLitKind` node containing an atom with the raw
+literal (including the quote marks and without interpreting the escapes).
+You can use `TSyntax.getString` to decode the string from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def strLit : Parser :=
  withAntiquot (mkAntiquot "str" strLitKind) strLitNoAntiquot

+/-- The parser `char` parses a character literal, such as `'a'` or `'\n'`. Character literals can
+contain C-style escapes like `\n`, `\"`, `\x00` or `\u2665`, as well as literal unicode characters
+like `∈`, but must evaluate to a single unicode codepoint, so `'♥'` is allowed but `'❤️'` is not
+(since it is two codepoints but one grapheme cluster).
+
+This parser has arity 1: it produces a `charLitKind` node containing an atom with the raw
+literal (including the quote marks and without interpreting the escapes).
+You can use `TSyntax.getChar` to decode the string from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def charLit : Parser :=
  withAntiquot (mkAntiquot "char" charLitKind) charLitNoAntiquot

+/-- The parser `name` parses a name literal like `` `foo``. The syntax is the same as for identifiers
+(see `ident`) but with a leading backquote.
+
+This parser has arity 1: it produces a `nameLitKind` node containing the raw literal
+(including the backquote).
+You can use `TSyntax.getName` to extract the name from the resulting syntax object. -/
@[run_builtin_parser_attribute_hooks] def nameLit : Parser :=
  withAntiquot (mkAntiquot "name" nameLitKind) nameLitNoAntiquot

+/-- The parser `group(p)` parses the same thing as `p`, but it wraps the results in a `groupKind`
+node.
+
+This parser always has arity 1, even if `p` does not. Parsers like `p*` are automatically
+rewritten to `group(p)*` if `p` does not have arity 1, so that the results from separate invocations
+of `p` can be differentiated. -/
@[run_builtin_parser_attribute_hooks, inline] def group (p : Parser) : Parser :=
  node groupKind p

+/-- The parser `many1Indent(p)` is equivalent to `withPosition((colGe p)+)`. This has the effect of
+parsing one or more occurrences of `p`, where each subsequent `p` parse needs to be indented
+the same or more than the first parse.
+
+This parser has arity 1, and returns a list of the results from `p`.
+`p` is "auto-grouped" if it is not arity 1. -/
@[run_builtin_parser_attribute_hooks, inline] def many1Indent (p : Parser) : Parser :=
  withPosition $ many1 (checkColGe "irrelevant" >> p)

+/-- The parser `manyIndent(p)` is equivalent to `withPosition((colGe p)*)`. This has the effect of
+parsing zero or more occurrences of `p`, where each subsequent `p` parse needs to be indented
+the same or more than the first parse.
+
+This parser has arity 1, and returns a list of the results from `p`.
+`p` is "auto-grouped" if it is not arity 1. -/
@[run_builtin_parser_attribute_hooks, inline] def manyIndent (p : Parser) : Parser :=
  withPosition $ many (checkColGe "irrelevant" >> p)

--- a/src/Lean/Parser/StrInterpolation.lean
+++ b/src/Lean/Parser/StrInterpolation.lean
@ -58,6 +58,17 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
  info := mkAtomicInfo "interpolatedStr"
 }

+/-- The parser `interpolatedStr(p)` parses a string literal like `"foo"` (see `str`), but the string
+may also contain `{}` escapes, and within the escapes the parser `p` is used. For example,
+`interpolatedStr(term)` will parse `"foo {2 + 2}"`, where `2 + 2` is parsed as a term rather than
+as a string. Note that the full Lean term grammar is available here, including string literals,
+so for example `"foo {"bar" ++ "baz"}"` is a legal interpolated string (which evaluates to
+`foo barbaz`).
+
+This parser has arity 1, and returns a `interpolatedStrKind` with an odd number of arguments,
+alternating between chunks of literal text and results from `p`. The literal chunks contain
+uninterpreted substrings of the input. For example, `"foo\n{2 + 2}"` would have three arguments:
+an atom `"foo\n{`, the parsed `2 + 2` term, and then the atom `}"`. -/
 def interpolatedStr (p : Parser) : Parser :=
  withAntiquot (mkAntiquot "interpolatedStr" interpolatedStrKind) $ interpolatedStrNoAntiquot p

--- a/src/Lean/Parser/Term.lean
+++ b/src/Lean/Parser/Term.lean
@ -18,6 +18,11 @@ def commentBody.parenthesizer := PrettyPrinter.Parenthesizer.visitToken
@[combinator_formatter commentBody]
 def commentBody.formatter := PrettyPrinter.Formatter.visitAtom Name.anonymous

+/-- A `docComment` parses a "documentation comment" like `/-- foo -/`. This is not treated like
+a regular comment (that is, as whitespace); it is parsed and forms part of the syntax tree structure.
+
+A `docComment` node contains a `/--` atom and then the remainder of the comment, `foo -/` in this
+example. Use `TSyntax.getDocString` to extract the body text from a doc string syntax node. -/
 def docComment := leading_parser
  ppDedent $ "/--" >> ppSpace >> commentBody >> ppLine
 end Command
@ -34,10 +39,28 @@ builtin_initialize

 namespace Tactic

+/-- `sepByIndentSemicolon(p)` parses a sequence of `p` optionally followed by `;`,
+similar to `manyIndent(p ";"?)`, except that if two occurrences of `p` occur on the same line,
+the `;` is mandatory. This is used by tactic parsing, so that
+```
+example := by
+  skip
+  skip
+```
+is legal, but `by skip skip` is not - it must be written as `by skip; skip`. -/
@[run_builtin_parser_attribute_hooks]
 def sepByIndentSemicolon (p : Parser) : Parser :=
  sepByIndent p "; " (allowTrailingSep := true)

+/-- `sepBy1IndentSemicolon(p)` parses a (nonempty) sequence of `p` optionally followed by `;`,
+similar to `many1Indent(p ";"?)`, except that if two occurrences of `p` occur on the same line,
+the `;` is mandatory. This is used by tactic parsing, so that
+```
+example := by
+  skip
+  skip
+```
+is legal, but `by skip skip` is not - it must be written as `by skip; skip`. -/
@[run_builtin_parser_attribute_hooks]
 def sepBy1IndentSemicolon (p : Parser) : Parser :=
  sepBy1Indent p "; " (allowTrailingSep := true)
@ -117,6 +140,8 @@ def optSemicolon (p : Parser) : Parser :=
 /-- A placeholder term, to be synthesized by unification. -/
@[builtin_term_parser] def hole := leading_parser
  "_"
+/-- Parses a "synthetic hole", that is, `?foo` or `?_`.
+This syntax is used to construct named metavariables. -/
@[builtin_term_parser] def syntheticHole := leading_parser
  "?" >> (ident <|> hole)
 def binderIdent : Parser  := ident <|> hole
@ -165,6 +190,8 @@ def optIdent : Parser :=
 def fromTerm   := leading_parser
  "from " >> termParser
 def showRhs := fromTerm <|> byTactic'
+/-- A `sufficesDecl` represents everything that comes after the `suffices` keyword:
+an optional `x :`, then a term `ty`, then `from val` or `by tac`. -/
 def sufficesDecl := leading_parser
  (atomic (group (binderIdent >> " : ")) <|> hygieneInfo) >> termParser >> ppSpace >> showRhs
@[builtin_term_parser] def «suffices» := leading_parser:leadPrec
@ -251,6 +278,12 @@ and solved by typeclass inference of the specified class.
 -/
 def instBinder := ppGroup <| leading_parser
  "[" >> withoutPosition (optIdent >> termParser) >> "]"
+/-- A `bracketedBinder` matches any kind of binder group that uses some kind of brackets:
+* An explicit binder like `(x y : A)`
+* An implicit binder like `{x y : A}`
+* A strict implicit binder, `⦃y z : A⦄` or its ASCII alternative `{{y z : A}}`
+* An instance binder `[A]` or `[x : A]` (multiple variables are not allowed here)
+-/
 def bracketedBinder (requireType := false) :=
  withAntiquot (mkAntiquot "bracketedBinder" decl_name% (isPseudoKind := true)) <|
    explicitBinder requireType <|> strictImplicitBinder requireType <|>
@ -297,6 +330,8 @@ instance : Coe (TSyntax ``matchAltExpr) (TSyntax ``matchAlt) where
 def matchAlts (rhsParser : Parser := termParser) : Parser :=
  leading_parser withPosition $ many1Indent (ppLine >> matchAlt rhsParser)

+/-- `matchDiscr` matches a "match discriminant", either `h : tm` or `tm`, used in `match` as
+`match h1 : e1, e2, h3 : e3 with ...`. -/
 def matchDiscr := leading_parser
  optional (atomic (ident >> " : ")) >> termParser

@ -419,9 +454,13 @@ def letPatDecl  := leading_parser (withAnonymousAntiquot := false)
 -/
 def letEqnsDecl := leading_parser (withAnonymousAntiquot := false)
  letIdLhs >> (" := " <|> matchAlts)
-- Remark: we disable anonymous antiquotations here to make sure
-- anonymous antiquotations (e.g., `$x`) are not `letDecl`
+/-- `letDecl` matches the body of a let declaration `let f x1 x2 := e`,
+`let pat := e` (where `pat` is an arbitrary term) or `let f | pat1 => e1 | pat2 => e2 ...`
+(a pattern matching declaration), except for the `let` keyword itself.
+`let rec` declarations are not handled here. -/
 def letDecl     := leading_parser (withAnonymousAntiquot := false)
+  -- Remark: we disable anonymous antiquotations here to make sure
+  -- anonymous antiquotations (e.g., `$x`) are not `letDecl`
  notFollowedBy (nonReservedSymbol "rec") "rec" >>
  (letIdDecl <|> letPatDecl <|> letEqnsDecl)
 /--
@ -472,6 +511,9 @@ def haveIdDecl   := leading_parser (withAnonymousAntiquot := false)
  atomic (haveIdLhs >> " := ") >> termParser
 def haveEqnsDecl := leading_parser (withAnonymousAntiquot := false)
  haveIdLhs >> matchAlts
+/-- `haveDecl` matches the body of a have declaration: `have := e`, `have f x1 x2 := e`,
+`have pat := e` (where `pat` is an arbitrary term) or `have f | pat1 => e1 | pat2 => e2 ...`
+(a pattern matching declaration), except for the `have` keyword itself. -/
 def haveDecl     := leading_parser (withAnonymousAntiquot := false)
  haveIdDecl <|> (ppSpace >> letPatDecl) <|> haveEqnsDecl
@[builtin_term_parser] def «have» := leading_parser:leadPrec
@ -479,13 +521,17 @@ def haveDecl     := leading_parser (withAnonymousAntiquot := false)

 def «scoped» := leading_parser "scoped "
 def «local»  := leading_parser "local "
+/-- `attrKind` matches `("scoped" <|> "local")?`, used before an attribute like `@[local simp]`. -/
 def attrKind := leading_parser optional («scoped» <|> «local»)
 def attrInstance     := ppGroup $ leading_parser attrKind >> attrParser

 def attributes       := leading_parser
  "@[" >> withoutPosition (sepBy1 attrInstance ", ") >> "] "
+/-- `letRecDecl` matches the body of a let-rec declaration: a doc comment, attributes, and then
+a let declaration without the `let` keyword, such as `/-- foo -/ @[simp] bar := 1`. -/
 def letRecDecl       := leading_parser
  optional Command.docComment >> optional «attributes» >> letDecl
+/-- `letRecDecls` matches `letRecDecl,+`, a comma-separated list of let-rec declarations (see `letRecDecl`). -/
 def letRecDecls      := leading_parser
  sepBy1 letRecDecl ", "
@[builtin_term_parser]
--- a/tests/lean/interactive/hover.lean.expected.out
+++ b/tests/lean/interactive/hover.lean.expected.out
@ -144,7 +144,7 @@
 {"start": {"line": 87, "character": 14}, "end": {"line": 87, "character": 36}},
 "contents":
 {"value":
-  "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\n*import Lean.Parser.Do*",
+  "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\nA `doSeq` is a sequence of `doElem`, the main argument after the `do` keyword and other\ndo elements that take blocks. It can either have the form `\"{\" (doElem \";\"?)* \"}\"` or\n`many1Indent (doElem \";\"?)`, where `many1Indent` ensures that all the items are at\nthe same or higher indentation level as the first line. \n***\n*import Lean.Parser.Do*",
  "kind": "markdown"}}
 {"textDocument": {"uri": "file://hover.lean"},
 "position": {"line": 89, "character": 30}}
@ -152,7 +152,7 @@
 {"start": {"line": 89, "character": 29}, "end": {"line": 89, "character": 34}},
 "contents":
 {"value":
-  "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\n*import Lean.Parser.Do*",
+  "```lean\nLean.Parser.Term.doSeq : Lean.Parser.Parser\n```\n***\nA `doSeq` is a sequence of `doElem`, the main argument after the `do` keyword and other\ndo elements that take blocks. It can either have the form `\"{\" (doElem \";\"?)* \"}\"` or\n`many1Indent (doElem \";\"?)`, where `many1Indent` ensures that all the items are at\nthe same or higher indentation level as the first line. \n***\n*import Lean.Parser.Do*",
  "kind": "markdown"}}
 {"textDocument": {"uri": "file://hover.lean"},
 "position": {"line": 92, "character": 2}}
@ -370,7 +370,12 @@
 "contents": {"value": "```lean\nn : ℕ\n```", "kind": "markdown"}}
 {"textDocument": {"uri": "file://hover.lean"},
 "position": {"line": 192, "character": 2}}
-null
+{"range":
+ {"start": {"line": 192, "character": 0}, "end": {"line": 192, "character": 9}},
+ "contents":
+ {"value":
+  "`declModifiers` is the collection of modifiers on a declaration:\n* a doc comment `/-! ... -/`\n* a list of attributes `@[attr1, attr2]`\n* a visibility specifier, `private` or `protected`\n* `noncomputable`\n* `unsafe`\n* `partial` or `nonrec`\n\nAll modifiers are optional, and have to come in the listed order.\n\n`nestedDeclModifiers` is the same as `declModifiers`, but attributes are printed\non the same line as the declaration. It is used for declarations nested inside other syntax,\nsuch as inductive constructors, structure projections, and `let rec` / `where` definitions. ",
+  "kind": "markdown"}}
 {"textDocument": {"uri": "file://hover.lean"},
 "position": {"line": 198, "character": 2}}
 {"range":