feat: custom error recovery in parser (#3413)
Adds a simple error-recovery mechanism to Lean's parser, similar to those used in other combinator parsing libraries. Lean itself isn't very amenable to error recovery with this mechanism, as it requires global knowledge of the grammar in question to write recovery rules that don't break backtracking or `<|>`. I only found a few opportunities. But for DSLs, this is really important. In particular, Verso parse errors interacted very badly with Lean parse errors in a way that required frequent "restart file" commands, but this mechanism allows me to both recover from Verso parse errors and to have Lean skip the rest of the file rather than repeatedly trying to parse it as Lean commands.
This commit is contained in:
parent
0fb936158b
commit
74e7886ce7
12 changed files with 304 additions and 45 deletions
|
|
@ -47,6 +47,7 @@ builtin_initialize
|
|||
register_parser_alias (kind := interpolatedStrKind) interpolatedStr
|
||||
register_parser_alias orelse
|
||||
register_parser_alias andthen { stackSz? := none }
|
||||
register_parser_alias recover
|
||||
|
||||
registerAlias "notFollowedBy" ``notFollowedBy (notFollowedBy · "element")
|
||||
Parenthesizer.registerAlias "notFollowedBy" notFollowedBy.parenthesizer
|
||||
|
|
|
|||
|
|
@ -203,8 +203,9 @@ def trailingNode (n : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Parser) : Traili
|
|||
|
||||
def mergeOrElseErrors (s : ParserState) (error1 : Error) (iniPos : String.Pos) (mergeErrors : Bool) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, some error2⟩ =>
|
||||
if pos == iniPos then ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2)⟩
|
||||
| ⟨stack, lhsPrec, pos, cache, some error2, errs⟩ =>
|
||||
if pos == iniPos then
|
||||
⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2), errs⟩
|
||||
else s
|
||||
| other => other
|
||||
|
||||
|
|
@ -285,7 +286,7 @@ instance : OrElse Parser where
|
|||
def atomicFn (p : ParserFn) : ParserFn := fun c s =>
|
||||
let iniPos := s.pos
|
||||
match p c s with
|
||||
| ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩
|
||||
| ⟨stack, lhsPrec, _, cache, some msg, errs⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg, errs⟩
|
||||
| other => other
|
||||
|
||||
/-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails,
|
||||
|
|
@ -296,6 +297,58 @@ This is important for the `p <|> q` combinator, because it is not backtracking,
|
|||
This parser has the same arity as `p` - it produces the same result as `p`. -/
|
||||
def atomic : Parser → Parser := withFn atomicFn
|
||||
|
||||
/-- Information about the state of the parse prior to the failing parser's execution -/
|
||||
structure RecoveryContext where
|
||||
/-- The position prior to the failing parser -/
|
||||
initialPos : String.Pos
|
||||
/-- The syntax stack height prior to the failing parser's execution -/
|
||||
initialSize : Nat
|
||||
deriving BEq, DecidableEq, Repr
|
||||
|
||||
/--
|
||||
Recover from errors in `p` using `recover` to consume input until a known-good state has appeared.
|
||||
If `recover` fails itself, then no recovery is performed.
|
||||
|
||||
`recover` is provided with information about the failing parser's effects , and it is run in the
|
||||
state immediately after the failure. -/
|
||||
def recoverFn (p : ParserFn) (recover : RecoveryContext → ParserFn) : ParserFn := fun c s =>
|
||||
let iniPos := s.pos
|
||||
let iniSz := s.stxStack.size
|
||||
let s' := p c s
|
||||
if let some msg := s'.errorMsg then
|
||||
let s' := recover ⟨iniPos, iniSz⟩ c {s' with errorMsg := none}
|
||||
if s'.hasError then s'
|
||||
else {s' with
|
||||
pos := s'.pos,
|
||||
lhsPrec := s'.lhsPrec,
|
||||
cache := s'.cache,
|
||||
errorMsg := none,
|
||||
recoveredErrors := s'.recoveredErrors.push (s'.pos, s'.stxStack, msg) }
|
||||
else s'
|
||||
|
||||
/--
|
||||
Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared.
|
||||
If `handler` fails itself, then no recovery is performed.
|
||||
|
||||
`handler` is provided with information about the failing parser's effects , and it is run in the
|
||||
state immediately after the failure.
|
||||
|
||||
The interactions between <|> and `recover'` are subtle, especially for syntactic
|
||||
categories that admit user extension. Consider avoiding it in these cases. -/
|
||||
def recover' (parser : Parser) (handler : RecoveryContext → Parser) : Parser where
|
||||
info := parser.info
|
||||
fn := recoverFn parser.fn fun s => handler s |>.fn
|
||||
|
||||
/--
|
||||
Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared.
|
||||
If `handler` fails itself, then no recovery is performed.
|
||||
|
||||
`handler` is run in the state immediately after the failure.
|
||||
|
||||
The interactions between <|> and `recover` are subtle, especially for syntactic
|
||||
categories that admit user extension. Consider avoiding it in these cases. -/
|
||||
def recover (parser handler : Parser) : Parser := recover' parser fun _ => handler
|
||||
|
||||
def optionalFn (p : ParserFn) : ParserFn := fun c s =>
|
||||
let iniSz := s.stackSize
|
||||
let iniPos := s.pos
|
||||
|
|
@ -956,11 +1009,11 @@ private def tokenFnAux : ParserFn := fun c s =>
|
|||
private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState :=
|
||||
-- do not cache token parsing errors, which are rare and usually fatal and thus not worth an extra field in `TokenCache`
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none⟩ =>
|
||||
| ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none, errs⟩ =>
|
||||
if stack.size == 0 then s
|
||||
else
|
||||
let tk := stack.back
|
||||
⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none⟩
|
||||
⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none, errs⟩
|
||||
| other => other
|
||||
|
||||
def tokenFn (expected : List String := []) : ParserFn := fun c s =>
|
||||
|
|
@ -1259,21 +1312,24 @@ def keepTop (s : SyntaxStack) (startStackSize : Nat) : SyntaxStack :=
|
|||
|
||||
def keepNewError (s : ParserState) (oldStackSize : Nat) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, err⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err⟩
|
||||
| ⟨stack, lhsPrec, pos, cache, err, errs⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err, errs⟩
|
||||
|
||||
def keepPrevError (s : ParserState) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option Error) (oldLhsPrec : Nat) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, _, _, cache, _⟩ => ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError⟩
|
||||
| ⟨stack, _, _, cache, _, errs⟩ =>
|
||||
⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError, errs⟩
|
||||
|
||||
def mergeErrors (s : ParserState) (oldStackSize : Nat) (oldError : Error) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, some err⟩ =>
|
||||
⟨stack.shrink oldStackSize, lhsPrec, pos, cache, if oldError == err then some err else some (oldError.merge err)⟩
|
||||
| ⟨stack, lhsPrec, pos, cache, some err, errs⟩ =>
|
||||
let newError := if oldError == err then err else oldError.merge err
|
||||
⟨stack.shrink oldStackSize, lhsPrec, pos, cache, some newError, errs⟩
|
||||
| other => other
|
||||
|
||||
def keepLatest (s : ParserState) (startStackSize : Nat) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none⟩
|
||||
| ⟨stack, lhsPrec, pos, cache, _, errs⟩ =>
|
||||
⟨keepTop stack startStackSize, lhsPrec, pos, cache, none, errs⟩
|
||||
|
||||
def replaceLongest (s : ParserState) (startStackSize : Nat) : ParserState :=
|
||||
s.keepLatest startStackSize
|
||||
|
|
|
|||
|
|
@ -18,6 +18,21 @@ namespace Parser
|
|||
|
||||
namespace Command
|
||||
|
||||
/-- Skip input until the next character that satisfies the predicate, then skip whitespace -/
|
||||
private def skipUntil (pred : Char → Bool) : Parser where
|
||||
fn :=
|
||||
andthenFn
|
||||
(takeUntilFn pred)
|
||||
(takeWhileFn Char.isWhitespace)
|
||||
|
||||
/-- Skip input until the next whitespace character (used for error recovery for certain tokens) -/
|
||||
private def skipUntilWs : Parser := skipUntil Char.isWhitespace
|
||||
|
||||
/-- Skip input until the next whitespace character or delimiter (used for error recovery for certain
|
||||
tokens, especially names that occur in signatures) -/
|
||||
private def skipUntilWsOrDelim : Parser := skipUntil fun c =>
|
||||
c.isWhitespace || c == '(' || c == ')' || c == ':' || c == '{' || c == '}' || c == '|'
|
||||
|
||||
/--
|
||||
Syntax quotation for (sequences of) commands.
|
||||
The identical syntax for term quotations takes priority,
|
||||
|
|
@ -68,7 +83,7 @@ def declModifiers (inline : Bool) := leading_parser
|
|||
optional («partial» <|> «nonrec»)
|
||||
/-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/
|
||||
def declId := leading_parser
|
||||
ident >> optional (".{" >> sepBy1 ident ", " >> "}")
|
||||
ident >> optional (".{" >> sepBy1 (recover ident (skipUntil (fun c => c.isWhitespace || c ∈ [',', '}']))) ", " >> "}")
|
||||
/-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/
|
||||
def declSig := leading_parser
|
||||
many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec
|
||||
|
|
@ -100,18 +115,18 @@ def «abbrev» := leading_parser
|
|||
def optDefDeriving :=
|
||||
optional (ppDedent ppLine >> atomic ("deriving " >> notSymbol "instance") >> sepBy1 ident ", ")
|
||||
def «def» := leading_parser
|
||||
"def " >> declId >> ppIndent optDeclSig >> declVal >> optDefDeriving
|
||||
"def " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> declVal >> optDefDeriving
|
||||
def «theorem» := leading_parser
|
||||
"theorem " >> declId >> ppIndent declSig >> declVal
|
||||
"theorem " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> declVal
|
||||
def «opaque» := leading_parser
|
||||
"opaque " >> declId >> ppIndent declSig >> optional declValSimple
|
||||
"opaque " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> optional declValSimple
|
||||
/- As `declSig` starts with a space, "instance" does not need a trailing space
|
||||
if we put `ppSpace` in the optional fragments. -/
|
||||
def «instance» := leading_parser
|
||||
Term.attrKind >> "instance" >> optNamedPrio >>
|
||||
optional (ppSpace >> declId) >> ppIndent declSig >> declVal
|
||||
def «axiom» := leading_parser
|
||||
"axiom " >> declId >> ppIndent declSig
|
||||
"axiom " >> recover declId skipUntilWsOrDelim >> ppIndent declSig
|
||||
/- As `declSig` starts with a space, "example" does not need a trailing space. -/
|
||||
def «example» := leading_parser
|
||||
"example" >> ppIndent optDeclSig >> declVal
|
||||
|
|
@ -144,11 +159,11 @@ or an element `head : α` followed by a list `tail : List α`.
|
|||
For more information about [inductive types](https://lean-lang.org/theorem_proving_in_lean4/inductive_types.html).
|
||||
-/
|
||||
def «inductive» := leading_parser
|
||||
"inductive " >> declId >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >>
|
||||
"inductive " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >>
|
||||
many ctor >> optional (ppDedent ppLine >> computedFields) >> optDeriving
|
||||
def classInductive := leading_parser
|
||||
atomic (group (symbol "class " >> "inductive ")) >>
|
||||
declId >> ppIndent optDeclSig >>
|
||||
recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >>
|
||||
optional (symbol " :=" <|> " where") >> many ctor >> optDeriving
|
||||
def structExplicitBinder := leading_parser
|
||||
atomic (declModifiers true >> "(") >>
|
||||
|
|
@ -175,7 +190,9 @@ def classTk := leading_parser
|
|||
def «extends» := leading_parser
|
||||
" extends " >> sepBy1 termParser ", "
|
||||
def «structure» := leading_parser
|
||||
(structureTk <|> classTk) >> declId >>
|
||||
(structureTk <|> classTk) >>
|
||||
-- Note: no error recovery here due to clashing with the `class abbrev` syntax
|
||||
declId >>
|
||||
ppIndent (many (ppSpace >> Term.bracketedBinder) >> optional «extends» >> Term.optType) >>
|
||||
optional ((symbol " := " <|> " where ") >> optional structCtor >> structFields) >>
|
||||
optDeriving
|
||||
|
|
@ -184,7 +201,7 @@ def «structure» := leading_parser
|
|||
(«abbrev» <|> «def» <|> «theorem» <|> «opaque» <|> «instance» <|> «axiom» <|> «example» <|>
|
||||
«inductive» <|> classInductive <|> «structure»)
|
||||
@[builtin_command_parser] def «deriving» := leading_parser
|
||||
"deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 ident ", "
|
||||
"deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 (recover ident skip) ", "
|
||||
@[builtin_command_parser] def noncomputableSection := leading_parser
|
||||
"noncomputable " >> "section" >> optional (ppSpace >> ident)
|
||||
@[builtin_command_parser] def «section» := leading_parser
|
||||
|
|
|
|||
|
|
@ -452,7 +452,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil
|
|||
let p := andthenFn whitespace (categoryParserFnImpl catName)
|
||||
let ictx := mkInputContext input fileName
|
||||
let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input)
|
||||
if s.hasError then
|
||||
if !s.allErrors.isEmpty then
|
||||
Except.error (s.toErrorMsg ictx)
|
||||
else if input.atEnd s.pos then
|
||||
Except.ok s.stxStack.back
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ structure ModuleParserState where
|
|||
recovering : Bool := false
|
||||
deriving Inhabited
|
||||
|
||||
private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Error) : Message := Id.run do
|
||||
let mut pos := s.pos
|
||||
private partial def mkErrorMessage (c : InputContext) (pos : String.Pos) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do
|
||||
let mut pos := pos
|
||||
let mut endPos? := none
|
||||
let mut e := e
|
||||
unless e.unexpectedTk.isMissing do
|
||||
|
|
@ -53,7 +53,7 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro
|
|||
e := { e with unexpected }
|
||||
-- if there is an unexpected token, include preceding whitespace as well as the expected token could
|
||||
-- be inserted at any of these places to fix the error; see tests/lean/1971.lean
|
||||
if let .original (trailing := trailing) .. := s.stxStack.back.getTailInfo then
|
||||
if let some trailing := lastTrailing stk then
|
||||
if trailing.stopPos == pos then
|
||||
pos := trailing.startPos
|
||||
{ fileName := c.fileName
|
||||
|
|
@ -61,6 +61,12 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro
|
|||
endPos := c.fileMap.toPosition <$> endPos?
|
||||
keepFullRange := true
|
||||
data := toString e }
|
||||
where
|
||||
-- Error recovery might lead to there being some "junk" on the stack
|
||||
lastTrailing (s : SyntaxStack) : Option Substring :=
|
||||
s.toSubarray.findSomeRevM? (m := Id) fun stx =>
|
||||
if let .original (trailing := trailing) .. := stx.getTailInfo then pure (some trailing)
|
||||
else none
|
||||
|
||||
def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × MessageLog) := do
|
||||
let dummyEnv ← mkEmptyEnvironment
|
||||
|
|
@ -68,12 +74,10 @@ def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × M
|
|||
let tokens := Module.updateTokens (getTokenTable dummyEnv)
|
||||
let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input)
|
||||
let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back
|
||||
match s.errorMsg with
|
||||
| some errorMsg =>
|
||||
let msg := mkErrorMessage inputCtx s errorMsg
|
||||
pure (stx, { pos := s.pos, recovering := true }, { : MessageLog }.add msg)
|
||||
| none =>
|
||||
pure (stx, { pos := s.pos }, {})
|
||||
let mut messages : MessageLog := {}
|
||||
for (pos, stk, err) in s.allErrors do
|
||||
messages := messages.add <| mkErrorMessage inputCtx pos stk err
|
||||
pure (stx, {pos := s.pos, recovering := s.hasError}, messages)
|
||||
|
||||
private def mkEOI (pos : String.Pos) : Syntax :=
|
||||
let atom := mkAtom (SourceInfo.original "".toSubstring pos "".toSubstring pos) ""
|
||||
|
|
@ -104,6 +108,9 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
|
|||
let pos' := pos
|
||||
let p := andthenFn whitespace topLevelCommandParserFn
|
||||
let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos }
|
||||
-- save errors from sub-recoveries
|
||||
for (rpos, rstk, recovered) in s.recoveredErrors do
|
||||
messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered
|
||||
pos := s.pos
|
||||
if recovering && !s.stxStack.isEmpty && s.stxStack.back.isAntiquot then
|
||||
-- top-level antiquotation during recovery is most likely remnant from unfinished quotation, ignore
|
||||
|
|
@ -122,7 +129,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
|
|||
We claim a syntactically incorrect command containing no token or identifier is irrelevant for intellisense and should be ignored. -/
|
||||
let ignore := s.stxStack.isEmpty || s.stxStack.back.getPos?.isNone
|
||||
unless recovering && ignore do
|
||||
messages := messages.add <| mkErrorMessage inputCtx s errorMsg
|
||||
messages := messages.add <| mkErrorMessage inputCtx s.pos s.stxStack errorMsg
|
||||
recovering := true
|
||||
if ignore then
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ structure ParserState where
|
|||
pos : String.Pos := 0
|
||||
cache : ParserCache
|
||||
errorMsg : Option Error := none
|
||||
recoveredErrors : Array (String.Pos × SyntaxStack × Error) := #[]
|
||||
|
||||
namespace ParserState
|
||||
|
||||
|
|
@ -233,16 +234,9 @@ def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState :=
|
|||
def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState :=
|
||||
{ s with pos := input.next' pos h }
|
||||
|
||||
def toErrorMsg (ctx : InputContext) (s : ParserState) : String :=
|
||||
match s.errorMsg with
|
||||
| none => ""
|
||||
| some msg =>
|
||||
let pos := ctx.fileMap.toPosition s.pos
|
||||
mkErrorStringWithPos ctx.fileName pos (toString msg)
|
||||
|
||||
def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, err⟩ =>
|
||||
| ⟨stack, lhsPrec, pos, cache, err, recovered⟩ =>
|
||||
if err != none && stack.size == iniStackSz then
|
||||
-- If there is an error but there are no new nodes on the stack, use `missing` instead.
|
||||
-- Thus we ensure the property that an syntax tree contains (at least) one `missing` node
|
||||
|
|
@ -252,25 +246,28 @@ def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserSta
|
|||
-- `node k1 p1 <|> ... <|> node kn pn` when all parsers fail. With the code below we
|
||||
-- instead return a less misleading single `missing` node without randomly selecting any `ki`.
|
||||
let stack := stack.push Syntax.missing
|
||||
⟨stack, lhsPrec, pos, cache, err⟩
|
||||
⟨stack, lhsPrec, pos, cache, err, recovered⟩
|
||||
else
|
||||
let newNode := Syntax.node SourceInfo.none k (stack.extract iniStackSz stack.size)
|
||||
let stack := stack.shrink iniStackSz
|
||||
let stack := stack.push newNode
|
||||
⟨stack, lhsPrec, pos, cache, err⟩
|
||||
⟨stack, lhsPrec, pos, cache, err, recovered⟩
|
||||
|
||||
def mkTrailingNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, err⟩ =>
|
||||
| ⟨stack, lhsPrec, pos, cache, err, errs⟩ =>
|
||||
let newNode := Syntax.node SourceInfo.none k (stack.extract (iniStackSz - 1) stack.size)
|
||||
let stack := stack.shrink (iniStackSz - 1)
|
||||
let stack := stack.push newNode
|
||||
⟨stack, lhsPrec, pos, cache, err⟩
|
||||
⟨stack, lhsPrec, pos, cache, err, errs⟩
|
||||
|
||||
def allErrors (s : ParserState) : Array (String.Pos × SyntaxStack × Error) :=
|
||||
s.recoveredErrors ++ (s.errorMsg.map (fun e => #[(s.pos, s.stxStack, e)])).getD #[]
|
||||
|
||||
@[inline]
|
||||
def setError (s : ParserState) (e : Error) : ParserState :=
|
||||
match s with
|
||||
| ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨stack, lhsPrec, pos, cache, some e⟩
|
||||
| ⟨stack, lhsPrec, pos, cache, _, errs⟩ => ⟨stack, lhsPrec, pos, cache, some e, errs⟩
|
||||
|
||||
def mkError (s : ParserState) (msg : String) : ParserState :=
|
||||
s.setError { expected := [msg] } |>.pushSyntax .missing
|
||||
|
|
@ -314,6 +311,14 @@ def mkUnexpectedTokenError (s : ParserState) (msg : String) (iniPos : String.Pos
|
|||
def mkUnexpectedErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState :=
|
||||
s.setPos pos |>.mkUnexpectedError msg
|
||||
|
||||
def toErrorMsg (ctx : InputContext) (s : ParserState) : String := Id.run do
|
||||
let mut errStr := ""
|
||||
for (pos, _stk, err) in s.allErrors do
|
||||
if errStr != "" then errStr := errStr ++ "\n"
|
||||
let pos := ctx.fileMap.toPosition pos
|
||||
errStr := errStr ++ mkErrorStringWithPos ctx.fileName pos (toString err)
|
||||
errStr
|
||||
|
||||
end ParserState
|
||||
|
||||
def ParserFn := ParserContext → ParserState → ParserState
|
||||
|
|
@ -416,7 +421,7 @@ def withCacheFn (parserName : Name) (p : ParserFn) : ParserFn := fun c s => Id.r
|
|||
if let some r := s.cache.parserCache.find? key then
|
||||
-- TODO: turn this into a proper trace once we have these in the parser
|
||||
--dbg_trace "parser cache hit: {parserName}:{s.pos} -> {r.stx}"
|
||||
return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg⟩
|
||||
return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg, s.recoveredErrors⟩
|
||||
let initStackSz := s.stxStack.raw.size
|
||||
let s := withStackDrop initStackSz p c { s with lhsPrec := 0, errorMsg := none }
|
||||
if s.stxStack.raw.size != initStackSz + 1 then
|
||||
|
|
|
|||
|
|
@ -194,6 +194,12 @@ def withMaybeTag (pos? : Option String.Pos) (x : FormatterM Unit) : Formatter :=
|
|||
-- them in turn. Uses the syntax traverser non-linearly!
|
||||
p1 <|> p2
|
||||
|
||||
@[combinator_formatter recover]
|
||||
def recover.formatter (fmt : PrettyPrinter.Formatter) := fmt
|
||||
|
||||
@[combinator_formatter recover']
|
||||
def recover'.formatter (fmt : PrettyPrinter.Formatter) := fmt
|
||||
|
||||
-- `mkAntiquot` is quite complex, so we'd rather have its formatter synthesized below the actual parser definition.
|
||||
-- Note that there is a mutual recursion
|
||||
-- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere
|
||||
|
|
|
|||
|
|
@ -267,6 +267,12 @@ def visitToken : Parenthesizer := do
|
|||
-- them in turn. Uses the syntax traverser non-linearly!
|
||||
p1 <|> p2
|
||||
|
||||
@[combinator_parenthesizer recover]
|
||||
def recover.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p
|
||||
|
||||
@[combinator_parenthesizer recover']
|
||||
def recover'.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p
|
||||
|
||||
-- `mkAntiquot` is quite complex, so we'd rather have its parenthesizer synthesized below the actual parser definition.
|
||||
-- Note that there is a mutual recursion
|
||||
-- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ options get_default_options() {
|
|||
// see https://lean-lang.org/lean4/doc/dev/bootstrap.html#further-bootstrapping-complications
|
||||
#if LEAN_IS_STAGE0 == 1
|
||||
// switch to `true` for ABI-breaking changes affecting meta code
|
||||
opts = opts.update({"interpreter", "prefer_native"}, false);
|
||||
opts = opts.update({"interpreter", "prefer_native"}, true);
|
||||
// switch to `true` for changing built-in parsers used in quotations
|
||||
opts = opts.update({"internal", "parseQuotWithCurrentStage"}, false);
|
||||
// toggling `parseQuotWithCurrentStage` may also require toggling the following option if macros/syntax
|
||||
|
|
|
|||
|
|
@ -2,5 +2,6 @@
|
|||
1971.lean:22:6-24:3: error: unexpected token 'def'; expected ':=', 'where' or '|'
|
||||
1971.lean:30:9-30:13: error: unexpected token 'def'; expected term
|
||||
1971.lean:34:9-36:7: error: unexpected token 'example'; expected identifier
|
||||
1971.lean:36:11-36:12: error: unexpected token; expected command
|
||||
1971.lean:39:17-41:7: error: unexpected token 'example'; expected 'false', 'true', numeral or string literal
|
||||
1971.lean:45:11: error: unterminated string literal
|
||||
|
|
|
|||
114
tests/lean/parserRecovery.lean
Normal file
114
tests/lean/parserRecovery.lean
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
import Lean
|
||||
|
||||
/-!
|
||||
# Parser error recovery
|
||||
|
||||
This test is to make sure that the error recovery feature is working as intended. In particular, it
|
||||
checks that certain errors in the head of a declaration (e.g. an invalid name) don't prevent further
|
||||
parse errors from being reported.
|
||||
-/
|
||||
|
||||
open Lean Parser
|
||||
|
||||
/-!
|
||||
## Error recovery for Lean commands
|
||||
-/
|
||||
def "foo" (x : Nat) : := 5
|
||||
|
||||
def 5 : Nat := 3
|
||||
|
||||
theorem yep : True := by trivial
|
||||
|
||||
theorem () : Nat := 99
|
||||
theorem 2.4 : Nat := )
|
||||
theorem "nat" : Nat := )
|
||||
theorem 3 : Nat := )
|
||||
|
||||
opaque ((( : Nat := )
|
||||
|
||||
axiom "nope" : 4 = ⟩
|
||||
|
||||
class inductive () where
|
||||
| "" : )
|
||||
|
||||
inductive 44: Type where | "" : 44
|
||||
|
||||
inductive 44 : Type where | "" : 44
|
||||
|
||||
inductive 44 (n : Nat) : Type where | "" : 44
|
||||
|
||||
inductive 44(n : Nat) : Type where | "" : 44
|
||||
|
||||
inductive 44| "" : 44
|
||||
|
||||
inductive !!! where
|
||||
| "" (x : Nat)) : Bogus
|
||||
|
||||
#eval Foo.a
|
||||
|
||||
inductive Item.{4, 5, 6, 7, a, "foo", b, c} : Nat where
|
||||
| foo : )
|
||||
| bar : )
|
||||
|
||||
example := ""
|
||||
|
||||
/-!
|
||||
## Error recovery for custom syntax
|
||||
|
||||
Adding recovery to Lean's term grammar doesn't work well with the primitive tools at hand
|
||||
(extensibility + token tables + backtracking are a real challenge), but for DSLs it can be useful.
|
||||
This tests that error recovery works for a term syntax extension.
|
||||
-/
|
||||
|
||||
def altParen : Parser :=
|
||||
"{|" >> termParser (prec := 51) >> recover "|}" skip
|
||||
|
||||
macro:50 e:altParen : term => pure ⟨e.raw[1]⟩
|
||||
|
||||
-- These terms show multiple errors due to recovery from missing |} tokens
|
||||
#eval ([ {|2 + 3] * {| 3 + |}
|
||||
|
||||
-- Only one error here
|
||||
#eval ([ (2 + 3] * ( 3 + ) )
|
||||
|
||||
example := ""
|
||||
|
||||
/-!
|
||||
## Error recovery for custom commands
|
||||
|
||||
This command can recover from many parse errors. Make sure that there's no arbitrary small limit by
|
||||
testing a few recoveries.
|
||||
-/
|
||||
|
||||
def nonws : Parser where
|
||||
fn :=
|
||||
andthenFn
|
||||
(andthenFn (satisfyFn (fun c => !c.isWhitespace && c != '#')) (takeWhileFn (!·.isWhitespace)))
|
||||
whitespace
|
||||
|
||||
def thing : Parser :=
|
||||
recover ident nonws
|
||||
|
||||
|
||||
open Lean Elab Command in
|
||||
elab "#go " xs:thing* (Lean.Parser.eoi <|> "#stop") : command => do
|
||||
let xs := toString xs
|
||||
elabCommand <| ← `(#eval s!"hey {$(quote xs)}")
|
||||
|
||||
-- Many errors due to recovery!
|
||||
#go
|
||||
hey
|
||||
5
|
||||
a
|
||||
g
|
||||
def
|
||||
5
|
||||
hey
|
||||
( 99
|
||||
(
|
||||
#stop
|
||||
|
||||
-- Even though the command had errors, this is still running as expected:
|
||||
|
||||
def x := 5
|
||||
#eval x
|
||||
46
tests/lean/parserRecovery.lean.expected.out
Normal file
46
tests/lean/parserRecovery.lean.expected.out
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
parserRecovery.lean:16:3-16:9: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:16:21-16:25: error: unexpected token ':='; expected term
|
||||
parserRecovery.lean:18:3-18:5: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:22:7-22:9: error: unexpected token '('; expected identifier
|
||||
parserRecovery.lean:22:9-22:10: error: unexpected token ')'; expected '_' or identifier
|
||||
parserRecovery.lean:23:7-23:11: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:23:20-23:22: error: unexpected token ')'; expected term
|
||||
parserRecovery.lean:24:7-24:13: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:24:22-24:24: error: unexpected token ')'; expected term
|
||||
parserRecovery.lean:25:7-25:9: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:25:18-25:20: error: unexpected token ')'; expected term
|
||||
parserRecovery.lean:27:6-27:8: error: unexpected token '('; expected identifier
|
||||
parserRecovery.lean:27:8-27:9: error: unexpected token '('; expected '_' or identifier
|
||||
parserRecovery.lean:29:5-29:12: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:29:18-29:20: error: unexpected token '⟩'; expected term
|
||||
parserRecovery.lean:31:15-31:17: error: unexpected token '('; expected identifier
|
||||
parserRecovery.lean:31:17-31:18: error: unexpected token ')'; expected '_' or identifier
|
||||
parserRecovery.lean:34:9-34:12: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:34:27: error: expected token
|
||||
parserRecovery.lean:36:9-36:12: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:36:28: error: expected token
|
||||
parserRecovery.lean:38:9-38:12: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:38:38: error: expected token
|
||||
parserRecovery.lean:40:9-40:12: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:40:37: error: expected token
|
||||
parserRecovery.lean:42:9-42:12: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:42:14: error: expected token
|
||||
parserRecovery.lean:44:9-44:11: error: unexpected token '!'; expected identifier
|
||||
parserRecovery.lean:45:4: error: expected token
|
||||
parserRecovery.lean:47:6-47:11: error: unknown identifier 'Foo.a'
|
||||
parserRecovery.lean:49:16-49:17: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:49:18-49:20: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:49:21-49:23: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:49:24-49:26: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:49:30-49:36: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:50:9-50:11: error: unexpected token ')'; expected term
|
||||
parserRecovery.lean:69:16-69:17: error: unexpected token ']'; expected '|}'
|
||||
parserRecovery.lean:69:20: error: unexpected token at this precedence level; consider parenthesizing the term
|
||||
parserRecovery.lean:72:15-72:16: error: unexpected token ']'; expected ')', ',' or ':'
|
||||
parserRecovery.lean:100:3-101:1: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:103:1-104:3: error: unexpected token 'def'; expected identifier
|
||||
parserRecovery.lean:105:0-105:1: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:106:3-107:1: error: unexpected token '('; expected identifier
|
||||
parserRecovery.lean:107:6-107:8: error: unexpected token; expected identifier
|
||||
parserRecovery.lean:108:2-108:3: error: unexpected token '('; expected identifier
|
||||
5
|
||||
Loading…
Add table
Reference in a new issue