fix: error messages from Verso docstring parser (#12372)

This PR extensively reworks the Verso docstring parser so that it gives
much better parser errors that provide more useful guidance.

Closes #12063
This commit is contained in:
David Thrane Christiansen 2026-02-07 08:49:06 +01:00 committed by GitHub
parent 03dc334f73
commit 99b3ba12c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 257 additions and 51 deletions

View file

@ -172,8 +172,8 @@ private def pushColumn : ParserFn := fun c s =>
private def guardColumn (p : Nat → Bool) (message : String) : ParserFn := fun c s =>
if p (c.currentColumn s) then s else s.mkErrorAt message s.pos
private def guardMinColumn (min : Nat) : ParserFn :=
guardColumn (· ≥ min) s!"expected column at least {min}"
private def guardMinColumn (min : Nat) (description : String := s!"expected column at least {min}") : ParserFn :=
guardColumn (· ≥ min) description
private def withCurrentColumn (p : Nat → ParserFn) : ParserFn := fun c s =>
p (c.currentColumn s) c s
@ -204,7 +204,7 @@ private def onlyBlockOpeners : ParserFn := fun c s =>
true
if ok then s
else s.mkErrorAt s!"beginning of line or sequence of nestable block openers at {position}" s.pos
else s.mkErrorAt "beginning of line or sequence of nestable block openers" s.pos
private def nl := satisfyFn (· == '\n') "newline"
@ -343,7 +343,9 @@ public def inlineTextChar : ParserFn := fun c s =>
let i := s.pos
if h : c.atEnd i then s.mkEOIError
else s.next' c i h
| '*' | '_' | '\n' | '[' | ']' | '{' | '}' | '`' => s.mkUnexpectedErrorAt s!"'{curr}'" i
| '\n' => s.mkUnexpectedErrorAt "unexpected newline" i
| '*' | '_' | '[' | ']' | '{' | '}' | '`' =>
(s.setPos i).mkUnexpectedError s!"unexpected '{curr}' (use '\\{curr}' to escape)" ["text"]
| '!' =>
let s := s.next' c i h
let i' := s.pos
@ -441,6 +443,20 @@ public def recoverBlock (p : ParserFn) (final : ParserFn := skipFn) : ParserFn :
recoverFn p fun _ =>
ignoreFn skipBlock >> final
-- Like `recoverBlock` but stores recovered errors at the original error position.
private def recoverBlockAtErrPos (p : ParserFn) : ParserFn := fun c s =>
let s := p c s
if let some msg := s.errorMsg then
let errPos := s.pos
let s' := (ignoreFn skipBlock) c {s with errorMsg := none}
if s'.hasError then s
else {s with
pos := s'.pos,
errorMsg := none,
stxStack := s'.stxStack,
recoveredErrors := s.recoveredErrors.push (errPos, s'.stxStack, msg)}
else s
private def recoverBlockWith (stxs : Array Syntax) (p : ParserFn) : ParserFn :=
recoverFn p fun rctx =>
ignoreFn skipBlock >>
@ -476,6 +492,35 @@ private def recoverEolWith (stxs : Array Syntax) (p : ParserFn) : ParserFn :=
show ParserFn from
fun _ s => stxs.foldl (init := s.shrinkStack rctx.initialSize) (·.pushSyntax ·)
-- Like `recoverEol` but stores recovered errors at the original error position
-- rather than the post-recovery position.
private def recoverEolAtErrPos (p : ParserFn) : ParserFn := fun c s =>
let s := p c s
if let some msg := s.errorMsg then
let errPos := s.pos
let s' := (ignoreFn skipToNewline) c {s with errorMsg := none}
if s'.hasError then s
else {s with
pos := s'.pos,
errorMsg := none,
stxStack := s'.stxStack,
recoveredErrors := s.recoveredErrors.push (errPos, s'.stxStack, msg)}
else s
-- Like `recoverEolWith` but stores recovered errors at the original error position
-- rather than the post-recovery position.
private def recoverEolWithAtErrPos (stxs : Array Syntax) (p : ParserFn) : ParserFn := fun c s =>
let iniSz := s.stxStack.size
let s := p c s
if let some msg := s.errorMsg then
let errPos := s.pos
let s' := (ignoreFn skipToNewline) c {s with errorMsg := none}
if s'.hasError then s
else
let s' := stxs.foldl (init := s'.shrinkStack iniSz) (·.pushSyntax ·)
{s' with recoveredErrors := s.recoveredErrors.push (errPos, s'.stxStack, msg)}
else s
private def recoverSkip (p : ParserFn) : ParserFn :=
recoverFn p fun _ => skipFn
@ -547,9 +592,22 @@ private def nameArgWhitespace : (multiline : Option Nat) → ParserFn
public def args (multiline : Option Nat := none) : ParserFn :=
sepByFn true arg (nameArgWhitespace multiline)
/--
Replaces any error from `p` at the initial position with `expected msg`. This ensures that
each sub-parser of `delimitedInline` contributes a clear expected-token name, and clears
unhelpful generic "unexpected" messages from inner parsers so that the more informative message
from `inlineTextChar` survives error merging via `<|>`.
-/
private def expectedFn (msg : String) (p : ParserFn) : ParserFn := fun c s =>
let iniPos := s.pos
let s := p c s
if s.hasError && s.pos == iniPos then
s.setError { expected := [msg] }
else s
/-- Parses a name and zero or more arguments to a role, directive, command, or code block. -/
public def nameAndArgs (multiline : Option Nat := none) : ParserFn :=
nameArgWhitespace multiline >> rawIdentFn (includeWhitespace := false) >>
nameArgWhitespace multiline >> expectedFn "identifier" (rawIdentFn (includeWhitespace := false)) >>
nameArgWhitespace multiline >> args (multiline := multiline)
/--
@ -594,6 +652,13 @@ def linebreak (ctxt : InlineCtxt) : ParserFn :=
private partial def notInLink (ctxt : InlineCtxt) : ParserFn := fun _ s =>
if ctxt.inLink then s.mkError "Already in a link" else s
-- Like `satisfyFn (· == '\n')` but with a better error message that mentions what was expected.
private def newlineOrUnexpected (msg : String) : ParserFn := fun c s =>
let i := s.pos
if h : c.atEnd i then s.mkEOIError
else if c.get' i h == '\n' then s.next' c i h
else s.mkUnexpectedError s!"unexpected '{c.get' i h}'" [msg]
mutual
private partial def emphLike
(name : SyntaxNodeKind) (char : Char) (what plural : String)
@ -734,10 +799,17 @@ mutual
nodeFn `str (asStringFn (quoted := true) (many1Fn (satisfyEscFn (fun c => c != ']' && c != '\n') "other than ']' or newline"))) >>
strFn "]")
private partial def linkTarget := ref <|> url
private partial def linkTarget : ParserFn := fun c s =>
let s := (ref <|> url) c s
if s.hasError then
match s.errorMsg with
| some e => s.setError { e with
expected := ["link target '(url)' or '[ref]' (use '\\[' for a literal '[')"] }
| none => s
else s
where
notUrlEnd := satisfyEscFn (· ∉ ")\n".toList) "not ')' or newline" >> takeUntilEscFn (· ∈ ")\n".toList)
notRefEnd := satisfyEscFn (· ∉ "]\n".toList) "not ']' or newline" >> takeUntilEscFn (· ∈ "]\n".toList)
notUrlEnd := expectedFn "URL" (satisfyEscFn (· ∉ ")\n".toList)) >> takeUntilEscFn (· ∈ ")\n".toList)
notRefEnd := expectedFn "reference name" (satisfyEscFn (· ∉ "]\n".toList)) >> takeUntilEscFn (· ∈ "]\n".toList)
ref : ParserFn :=
nodeFn ``Syntax.ref <|
(atomicFn <| strFn "[") >>
@ -761,7 +833,13 @@ mutual
nodeFn ``role <|
intro >> (bracketed <|> atomicFn nonBracketed)
where
intro := atomicFn (chFn '{') >> recoverBlock (eatSpaces >> nameAndArgs >> eatSpaces >> chFn '}')
intro := atomicFn (chFn '{') >> recoverBlockAtErrPos (eatSpaces >> nameAndArgs >> eatSpaces >>
rawFn (fun c s =>
let i := s.pos
if h : c.atEnd i then s.mkEOIError [closeMsg]
else if c.get' i h == '}' then s.next' c i h
else (s.setPos i).mkUnexpectedError s!"unexpected '{c.get' i h}'" [closeMsg]) false)
closeMsg := "positional argument, named argument, flag, or '}' (use '\\{' for a literal '{')"
bracketed := atomicFn (chFn '[') >> recoverBlock (manyFn (inline ctxt) >> chFn ']')
fakeOpen := .atom SourceInfo.none "["
fakeClose := .atom SourceInfo.none "]"
@ -774,14 +852,15 @@ mutual
Parses an inline that is self-delimiting (that is, with well-defined start and stop characters).
-/
public partial def delimitedInline (ctxt : InlineCtxt) : ParserFn :=
emph ctxt <|> bold ctxt <|> code <|> math <|> role ctxt <|> image <|>
expectedFn "'_'" (emph ctxt) <|> expectedFn "'*'" (bold ctxt) <|>
expectedFn "'`'" code <|> math <|> expectedFn "'{'" (role ctxt) <|> image <|>
link ctxt <|> footnote ctxt
/--
Parses any inline element.
-/
public partial def inline (ctxt : InlineCtxt) : ParserFn :=
text <|> linebreak ctxt <|> delimitedInline ctxt
text <|> expectedFn "newline" (linebreak ctxt) <|> delimitedInline ctxt
end
/--
@ -1004,6 +1083,7 @@ private def recoverUnindent (indent : Nat) (p : ParserFn) (finish : ParserFn :=
ParserFn :=
recoverFn p (fun _ => ignoreFn (skipUntilDedent indent) >> finish)
private def blockSep := ignoreFn (manyFn blankLine >> optionalFn endLine)
mutual
@ -1080,7 +1160,7 @@ mutual
/-- Parses a paragraph (that is, a sequence of otherwise-undecorated inlines). -/
public partial def para (ctxt : BlockCtxt) : ParserFn :=
nodeFn ``para <|
atomicFn (takeWhileFn (· == ' ') >> notFollowedByFn blockOpener "block opener" >> guardMinColumn ctxt.minIndent) >>
atomicFn (takeWhileFn (· == ' ') >> notFollowedByFn blockOpener "block opener" >> guardMinColumn ctxt.minIndent s!"paragraph indented at least {ctxt.minIndent}") >>
fakeAtomHere "para{" >>
textLine >>
fakeAtomHere "}"
@ -1128,7 +1208,7 @@ mutual
let fenceWidth := c' - c
takeWhileFn (· == ' ') >>
optionalFn nameAndArgs >>
asStringFn (satisfyFn (· == '\n') "newline") >>
asStringFn (recoverEolAtErrPos (newlineOrUnexpected "positional argument, named argument, flag, or newline")) >>
nodeFn strLitKind (asStringFn (manyFn (atomicFn blankLine <|> codeFrom c fenceWidth)) (transform := deIndent c) (quoted := true)) >>
closeFence c fenceWidth
where
@ -1170,7 +1250,7 @@ mutual
asStringFn (atLeastFn 3 (skipChFn ':')) >>
guardOpenerSize >>
eatSpaces >>
recoverEolWith #[.missing, .node .none nullKind #[]] (nameAndArgs >> satisfyFn (· == '\n') "newline")) >>
recoverEolWithAtErrPos #[.missing, .node .none nullKind #[]] (nameAndArgs >> newlineOrUnexpected "positional argument, named argument, flag, or newline")) >>
fakeAtom "\n" >>
ignoreFn (manyFn blankLine) >>
(withFencePos 3 fun ⟨l, col⟩ =>
@ -1266,7 +1346,11 @@ mutual
Parses a block.
-/
public partial def block (c : BlockCtxt) : ParserFn :=
block_command c <|> unorderedList c <|> orderedList c <|> definitionList c <|> header c <|> codeBlock c <|> directive c <|> blockquote c <|> linkRef c <|> footnoteRef c <|> para c <|> metadataBlock c
expectedFn "block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}')" (
block_command c <|> unorderedList c <|> orderedList c <|> definitionList c <|>
header c <|> codeBlock c <|> directive c <|> blockquote c <|>
linkRef c <|> footnoteRef c <|> metadataBlock c) <|>
para c
/--
Parses zero or more blocks.

View file

@ -1,7 +1,7 @@
2 failures:
@36 (⟨3, 28⟩): expected token
""
@36 (⟨3, 28⟩): unexpected end of input; expected '![', '$$', '$', '[' or '[^'
@1 (⟨1, 1⟩): expected identifier
"\ntest}\nHere's a modified paragraph."
@36 (⟨3, 28⟩): unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
""
Final stack:

View file

@ -1,7 +1,7 @@
2 failures:
@37 (⟨3, 28⟩): expected token
""
@37 (⟨3, 28⟩): unexpected end of input; expected '![', '$$', '$', '[' or '[^'
@1 (⟨1, 1⟩): expected identifier
"\n test}\nHere's a modified paragraph."
@37 (⟨3, 28⟩): unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
""
Final stack:

View file

@ -1,7 +1,7 @@
2 failures:
@44 (⟨4, 28⟩): expected token
""
@44 (⟨4, 28⟩): unexpected end of input; expected '![', '$$', '$', '[' or '[^'
@1 (⟨1, 1⟩): expected identifier
"\n test\narg}\nHere's a modified paragraph."
@44 (⟨4, 28⟩): unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
""
Final stack:

View file

@ -1,7 +1,7 @@
2 failures:
@45 (⟨4, 28⟩): expected token
""
@45 (⟨4, 28⟩): unexpected end of input; expected '![', '$$', '$', '[' or '[^'
@1 (⟨1, 1⟩): expected identifier
"\n test\n arg}\nHere's a modified paragraph."
@45 (⟨4, 28⟩): unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
""
Final stack:

View file

@ -1,7 +1,7 @@
2 failures:
@19 (⟨6, 0⟩): '{'; expected '![', '$$', '$', '[' or '[^'
"Here's a paragraph."
@19 (⟨6, 0⟩): expected token
@1 (⟨1, 1⟩): expected identifier
"\n test\n arg}\n\n\nHere's a paragraph."
@19 (⟨6, 0⟩): '['; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
"Here's a paragraph."
Final stack:

View file

@ -1,11 +1,9 @@
Failure @2 (⟨2, 0⟩): ':'; expected %%% (at line beginning) or expected column at least 1
Failure @2 (⟨2, 0⟩): expected block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}') or paragraph indented at least 1
Final stack:
[(Lean.Doc.Syntax.ul
"ul{"
[(Lean.Doc.Syntax.li
"*"
[(Lean.Doc.Syntax.metadata_block
<missing>
<missing>)
[(Lean.Doc.Syntax.para <missing>)
<missing>])])]
Remaining: "abc"

View file

@ -1,4 +1,4 @@
Failure @35 (⟨2, 15⟩): unexpected end of input; expected %%% (at line beginning), '![', '$$', '$', '[', '[^', beginning of line at ⟨2, 15⟩ or beginning of line or sequence of nestable block openers at ⟨2, 15⟩
Failure @35 (⟨2, 15⟩): unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`', '{', block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}') or newline
Final stack:
[(Lean.Doc.Syntax.dl
"dl{"
@ -12,7 +12,9 @@ Final stack:
(Lean.Doc.Syntax.text
(str "\"Let's say more!\""))]
"=>"
[(Lean.Doc.Syntax.metadata_block <missing>)
[(Lean.Doc.Syntax.para
"para{"
[(Lean.Doc.Syntax.footnote <missing>)])
<missing>])]
"}")]
Remaining: ""

View file

@ -1,5 +1,5 @@
3 failures:
@73 (⟨9, 3⟩): expected token
@73 (⟨9, 3⟩): expected identifier
"\n"
@74 (⟨10, 0⟩): expected closing ':::' for directive from line 9
""

View file

@ -1,7 +1,7 @@
3 failures:
@68 (⟨8, 0⟩): expected closing ':::' from directive on line 3 at column 0, but it's at column 1
" :::\n"
@72 (⟨8, 4⟩): expected token
@72 (⟨8, 4⟩): expected identifier
"\n"
@73 (⟨9, 0⟩): expected closing ':::' for directive from line 8
""

View file

@ -1,4 +1,4 @@
Failure @45 (⟨2, 29⟩): expected '(' or '['
Failure @45 (⟨2, 29⟩): expected link target '(url)' or '[ref]' (use '\[' for a literal '[')
Final stack:
[(Lean.Doc.Syntax.para
"para{"

View file

@ -1,4 +1,4 @@
Failure @0 (⟨1, 0⟩): '['
Failure @0 (⟨1, 0⟩): unexpected '[' (use '\[' to escape); expected text
Final stack:
<missing>
Remaining: "[abc"

View file

@ -1,4 +1,4 @@
Failure @0 (⟨1, 0⟩): '*'
Failure @0 (⟨1, 0⟩): unexpected '*' (use '\*' to escape); expected text
Final stack:
[<missing>]
Remaining: "*"

View file

@ -1,4 +1,4 @@
Failure @19 (⟨1, 19⟩): expected '(' or '['
Failure @19 (⟨1, 19⟩): expected link target '(url)' or '[ref]' (use '\[' for a literal '[')
Final stack:
(Lean.Doc.Syntax.image
"!["

View file

@ -1,4 +1,4 @@
Failure @55 (⟨3, 0⟩): '{'; expected '![', '$$', '$', '[' or '[^'
Failure @55 (⟨3, 0⟩): '['; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
Final stack:
(Lean.Doc.Syntax.para
"para{"

View file

@ -1,4 +1,4 @@
Failure @92 (⟨7, 0⟩): '{'; expected '![', '$$', '$', '[' or '[^'
Failure @92 (⟨7, 0⟩): '['; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
Final stack:
[(Lean.Doc.Syntax.dl
"dl{"

View file

@ -8,11 +8,13 @@ This test ensures that syntax errors in Verso docs are appropriately reported.
-- Syntax error in module docstring should report actual error location
/--
error: '}'
@ +2:40...*
error: unexpected '`'; expected positional argument, named argument, flag, or '}' (use '\{' for a literal '{')
---
error: unexpected end of input; expected '![', '$$', '$', '[' or '[^'
@ +3:0...*
error: unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
-/
#guard_msgs in
#guard_msgs (positions := true) in
/-!
Here is text with an unclosed role {name`Nat
-/
@ -44,10 +46,130 @@ A b c d e f.
-/
def x := 5
-- Issue #12063: bare curly brace should suggest escaping
/--
@ +2:8...*
error: unexpected '='; expected positional argument, named argument, flag, or '}' (use '\{' for a literal '{')
---
@ +3:0...*
error: unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
-/
#guard_msgs (positions := true) in
/-!
{module =checked}
-/
/--
@ +2:7...*
error: unexpected '='; expected positional argument, named argument, flag, or newline
-/
#guard_msgs (positions := true) in
/-!
```foo =thing
```
-/
/--
@ +2:5...*
error: unexpected '='; expected positional argument, named argument, flag, or '}' (use '\{' for a literal '{')
---
@ +3:0...*
error: unexpected end of input; expected '![', '$$', '$', '*', '[', '[^', '_', '`' or '{'
-/
#guard_msgs (positions := true) in
/-!
{foo =thing}[]
-/
/--
@ +2:7...*
error: unexpected '='; expected positional argument, named argument, flag, or newline
-/
#guard_msgs (positions := true) in
/-!
:::foo =thing
:::
-/
-- Issue #12063: link target should suggest escaping
/--
@ +2:24...*
error: expected link target '(url)' or '[ref]' (use '\[' for a literal '[')
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`]
-/
-- Escaped special characters should parse without errors
/-!
Use \{ and \} for literal braces.
Use \[ and \] for literal brackets.
Use \* and \_ for literal asterisks and underscores.
-/
/--
@ +2:25...*
error: expected URL
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`](
-/
/--
@ +2:26...*
error: expected URL
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`]()
-/
/--
@ +2:32...*
error: expected ')'
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`](http://
-/
/--
@ +2:25...*
error: expected reference name
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`][
-/
/--
@ +2:26...*
error: expected reference name
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`][]
-/
/--
@ +2:28...*
error: expected ']'
-/
#guard_msgs (positions := true) in
/-!
[`rigid` --> `flexible`][xyz
-/
-- Unmatched closing bracket in docstring (issue #12118)
/--
@ +2:0...*
error: '{'; expected %%% (at line beginning), '![', '$$', '$', '[', '[^', *, + or -
error: unexpected '}' (use '\}' to escape); expected '![', '$$', '$', '*', '[', '[^', '_', '`', '{', block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}'), newline or text
-/
#guard_msgs (positions := true) in
/--
@ -58,7 +180,7 @@ def z := 0
-- Unmatched closing bracket in module docstring
/--
@ +2:0...*
error: '{'; expected %%% (at line beginning), '![', '$$', '$', '[', '[^', *, + or -
error: unexpected '}' (use '\}' to escape); expected '![', '$$', '$', '*', '[', '[^', '_', '`', '{', block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}'), newline or text
-/
#guard_msgs (positions := true) in
/-!
@ -68,7 +190,7 @@ error: '{'; expected %%% (at line beginning), '![', '$$', '$', '[', '[^', *, + o
-- Unmatched closing square bracket in docstring
/--
@ +2:0...*
error: '{'; expected %%% (at line beginning), '![', '$$', '$', '[', '[^', *, + or -
error: unexpected ']' (use '\]' to escape); expected '![', '$$', '$', '*', '[', '[^', '_', '`', '{', block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}'), newline or text
-/
#guard_msgs (positions := true) in
/--
@ -79,7 +201,7 @@ def w := 0
-- Unmatched closing square bracket in module docstring
/--
@ +2:0...*
error: '{'; expected %%% (at line beginning), '![', '$$', '$', '[', '[^', *, + or -
error: unexpected ']' (use '\]' to escape); expected '![', '$$', '$', '*', '[', '[^', '_', '`', '{', block opener (at line start: '#', '>', ':', '*', '-', '+', '1.', '```', '%%%', '{..}'), newline or text
-/
#guard_msgs (positions := true) in
/-!