From 74e7886ce779d99b0ae33a75314d7b41219a2fe9 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 21 Feb 2024 15:29:54 +0100 Subject: [PATCH] feat: custom error recovery in parser (#3413) Adds a simple error-recovery mechanism to Lean's parser, similar to those used in other combinator parsing libraries. Lean itself isn't very amenable to error recovery with this mechanism, as it requires global knowledge of the grammar in question to write recovery rules that don't break backtracking or `<|>`. I only found a few opportunities. But for DSLs, this is really important. In particular, Verso parse errors interacted very badly with Lean parse errors in a way that required frequent "restart file" commands, but this mechanism allows me to both recover from Verso parse errors and to have Lean skip the rest of the file rather than repeatedly trying to parse it as Lean commands. --- src/Lean/Parser.lean | 1 + src/Lean/Parser/Basic.lean | 76 +++++++++++-- src/Lean/Parser/Command.lean | 35 ++++-- src/Lean/Parser/Extension.lean | 2 +- src/Lean/Parser/Module.lean | 27 +++-- src/Lean/Parser/Types.lean | 33 +++--- src/Lean/PrettyPrinter/Formatter.lean | 6 ++ src/Lean/PrettyPrinter/Parenthesizer.lean | 6 ++ stage0/src/stdlib_flags.h | 2 +- tests/lean/1971.lean.expected.out | 1 + tests/lean/parserRecovery.lean | 114 ++++++++++++++++++++ tests/lean/parserRecovery.lean.expected.out | 46 ++++++++ 12 files changed, 304 insertions(+), 45 deletions(-) create mode 100644 tests/lean/parserRecovery.lean create mode 100644 tests/lean/parserRecovery.lean.expected.out diff --git a/src/Lean/Parser.lean b/src/Lean/Parser.lean index fe2e6cfb07..e978450ef7 100644 --- a/src/Lean/Parser.lean +++ b/src/Lean/Parser.lean @@ -47,6 +47,7 @@ builtin_initialize register_parser_alias (kind := interpolatedStrKind) interpolatedStr register_parser_alias orelse register_parser_alias andthen { stackSz? := none } + register_parser_alias recover registerAlias "notFollowedBy" ``notFollowedBy (notFollowedBy · "element") Parenthesizer.registerAlias "notFollowedBy" notFollowedBy.parenthesizer diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 3cd2da1706..7ba45f62a6 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -203,8 +203,9 @@ def trailingNode (n : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Parser) : Traili def mergeOrElseErrors (s : ParserState) (error1 : Error) (iniPos : String.Pos) (mergeErrors : Bool) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, some error2⟩ => - if pos == iniPos then ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2)⟩ + | ⟨stack, lhsPrec, pos, cache, some error2, errs⟩ => + if pos == iniPos then + ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2), errs⟩ else s | other => other @@ -285,7 +286,7 @@ instance : OrElse Parser where def atomicFn (p : ParserFn) : ParserFn := fun c s => let iniPos := s.pos match p c s with - | ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩ + | ⟨stack, lhsPrec, _, cache, some msg, errs⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg, errs⟩ | other => other /-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails, @@ -296,6 +297,58 @@ This is important for the `p <|> q` combinator, because it is not backtracking, This parser has the same arity as `p` - it produces the same result as `p`. -/ def atomic : Parser → Parser := withFn atomicFn +/-- Information about the state of the parse prior to the failing parser's execution -/ +structure RecoveryContext where + /-- The position prior to the failing parser -/ + initialPos : String.Pos + /-- The syntax stack height prior to the failing parser's execution -/ + initialSize : Nat +deriving BEq, DecidableEq, Repr + +/-- +Recover from errors in `p` using `recover` to consume input until a known-good state has appeared. +If `recover` fails itself, then no recovery is performed. + +`recover` is provided with information about the failing parser's effects , and it is run in the +state immediately after the failure. -/ +def recoverFn (p : ParserFn) (recover : RecoveryContext → ParserFn) : ParserFn := fun c s => + let iniPos := s.pos + let iniSz := s.stxStack.size + let s' := p c s + if let some msg := s'.errorMsg then + let s' := recover ⟨iniPos, iniSz⟩ c {s' with errorMsg := none} + if s'.hasError then s' + else {s' with + pos := s'.pos, + lhsPrec := s'.lhsPrec, + cache := s'.cache, + errorMsg := none, + recoveredErrors := s'.recoveredErrors.push (s'.pos, s'.stxStack, msg) } + else s' + +/-- +Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared. +If `handler` fails itself, then no recovery is performed. + +`handler` is provided with information about the failing parser's effects , and it is run in the +state immediately after the failure. + +The interactions between <|> and `recover'` are subtle, especially for syntactic +categories that admit user extension. Consider avoiding it in these cases. -/ +def recover' (parser : Parser) (handler : RecoveryContext → Parser) : Parser where + info := parser.info + fn := recoverFn parser.fn fun s => handler s |>.fn + +/-- +Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared. +If `handler` fails itself, then no recovery is performed. + +`handler` is run in the state immediately after the failure. + +The interactions between <|> and `recover` are subtle, especially for syntactic +categories that admit user extension. Consider avoiding it in these cases. -/ +def recover (parser handler : Parser) : Parser := recover' parser fun _ => handler + def optionalFn (p : ParserFn) : ParserFn := fun c s => let iniSz := s.stackSize let iniPos := s.pos @@ -956,11 +1009,11 @@ private def tokenFnAux : ParserFn := fun c s => private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState := -- do not cache token parsing errors, which are rare and usually fatal and thus not worth an extra field in `TokenCache` match s with - | ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none⟩ => + | ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none, errs⟩ => if stack.size == 0 then s else let tk := stack.back - ⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none⟩ + ⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none, errs⟩ | other => other def tokenFn (expected : List String := []) : ParserFn := fun c s => @@ -1259,21 +1312,24 @@ def keepTop (s : SyntaxStack) (startStackSize : Nat) : SyntaxStack := def keepNewError (s : ParserState) (oldStackSize : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, err⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err⟩ + | ⟨stack, lhsPrec, pos, cache, err, errs⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err, errs⟩ def keepPrevError (s : ParserState) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option Error) (oldLhsPrec : Nat) : ParserState := match s with - | ⟨stack, _, _, cache, _⟩ => ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError⟩ + | ⟨stack, _, _, cache, _, errs⟩ => + ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError, errs⟩ def mergeErrors (s : ParserState) (oldStackSize : Nat) (oldError : Error) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, some err⟩ => - ⟨stack.shrink oldStackSize, lhsPrec, pos, cache, if oldError == err then some err else some (oldError.merge err)⟩ + | ⟨stack, lhsPrec, pos, cache, some err, errs⟩ => + let newError := if oldError == err then err else oldError.merge err + ⟨stack.shrink oldStackSize, lhsPrec, pos, cache, some newError, errs⟩ | other => other def keepLatest (s : ParserState) (startStackSize : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none⟩ + | ⟨stack, lhsPrec, pos, cache, _, errs⟩ => + ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none, errs⟩ def replaceLongest (s : ParserState) (startStackSize : Nat) : ParserState := s.keepLatest startStackSize diff --git a/src/Lean/Parser/Command.lean b/src/Lean/Parser/Command.lean index 40f246f96f..6c87e15057 100644 --- a/src/Lean/Parser/Command.lean +++ b/src/Lean/Parser/Command.lean @@ -18,6 +18,21 @@ namespace Parser namespace Command +/-- Skip input until the next character that satisfies the predicate, then skip whitespace -/ +private def skipUntil (pred : Char → Bool) : Parser where + fn := + andthenFn + (takeUntilFn pred) + (takeWhileFn Char.isWhitespace) + +/-- Skip input until the next whitespace character (used for error recovery for certain tokens) -/ +private def skipUntilWs : Parser := skipUntil Char.isWhitespace + +/-- Skip input until the next whitespace character or delimiter (used for error recovery for certain + tokens, especially names that occur in signatures) -/ +private def skipUntilWsOrDelim : Parser := skipUntil fun c => + c.isWhitespace || c == '(' || c == ')' || c == ':' || c == '{' || c == '}' || c == '|' + /-- Syntax quotation for (sequences of) commands. The identical syntax for term quotations takes priority, @@ -68,7 +83,7 @@ def declModifiers (inline : Bool) := leading_parser optional («partial» <|> «nonrec») /-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/ def declId := leading_parser - ident >> optional (".{" >> sepBy1 ident ", " >> "}") + ident >> optional (".{" >> sepBy1 (recover ident (skipUntil (fun c => c.isWhitespace || c ∈ [',', '}']))) ", " >> "}") /-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/ def declSig := leading_parser many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec @@ -100,18 +115,18 @@ def «abbrev» := leading_parser def optDefDeriving := optional (ppDedent ppLine >> atomic ("deriving " >> notSymbol "instance") >> sepBy1 ident ", ") def «def» := leading_parser - "def " >> declId >> ppIndent optDeclSig >> declVal >> optDefDeriving + "def " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> declVal >> optDefDeriving def «theorem» := leading_parser - "theorem " >> declId >> ppIndent declSig >> declVal + "theorem " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> declVal def «opaque» := leading_parser - "opaque " >> declId >> ppIndent declSig >> optional declValSimple + "opaque " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> optional declValSimple /- As `declSig` starts with a space, "instance" does not need a trailing space if we put `ppSpace` in the optional fragments. -/ def «instance» := leading_parser Term.attrKind >> "instance" >> optNamedPrio >> optional (ppSpace >> declId) >> ppIndent declSig >> declVal def «axiom» := leading_parser - "axiom " >> declId >> ppIndent declSig + "axiom " >> recover declId skipUntilWsOrDelim >> ppIndent declSig /- As `declSig` starts with a space, "example" does not need a trailing space. -/ def «example» := leading_parser "example" >> ppIndent optDeclSig >> declVal @@ -144,11 +159,11 @@ or an element `head : α` followed by a list `tail : List α`. For more information about [inductive types](https://lean-lang.org/theorem_proving_in_lean4/inductive_types.html). -/ def «inductive» := leading_parser - "inductive " >> declId >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >> + "inductive " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >> many ctor >> optional (ppDedent ppLine >> computedFields) >> optDeriving def classInductive := leading_parser atomic (group (symbol "class " >> "inductive ")) >> - declId >> ppIndent optDeclSig >> + recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >> many ctor >> optDeriving def structExplicitBinder := leading_parser atomic (declModifiers true >> "(") >> @@ -175,7 +190,9 @@ def classTk := leading_parser def «extends» := leading_parser " extends " >> sepBy1 termParser ", " def «structure» := leading_parser - (structureTk <|> classTk) >> declId >> + (structureTk <|> classTk) >> + -- Note: no error recovery here due to clashing with the `class abbrev` syntax + declId >> ppIndent (many (ppSpace >> Term.bracketedBinder) >> optional «extends» >> Term.optType) >> optional ((symbol " := " <|> " where ") >> optional structCtor >> structFields) >> optDeriving @@ -184,7 +201,7 @@ def «structure» := leading_parser («abbrev» <|> «def» <|> «theorem» <|> «opaque» <|> «instance» <|> «axiom» <|> «example» <|> «inductive» <|> classInductive <|> «structure») @[builtin_command_parser] def «deriving» := leading_parser - "deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 ident ", " + "deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 (recover ident skip) ", " @[builtin_command_parser] def noncomputableSection := leading_parser "noncomputable " >> "section" >> optional (ppSpace >> ident) @[builtin_command_parser] def «section» := leading_parser diff --git a/src/Lean/Parser/Extension.lean b/src/Lean/Parser/Extension.lean index c0994b3162..db2d2e47c9 100644 --- a/src/Lean/Parser/Extension.lean +++ b/src/Lean/Parser/Extension.lean @@ -452,7 +452,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil let p := andthenFn whitespace (categoryParserFnImpl catName) let ictx := mkInputContext input fileName let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input) - if s.hasError then + if !s.allErrors.isEmpty then Except.error (s.toErrorMsg ictx) else if input.atEnd s.pos then Except.ok s.stxStack.back diff --git a/src/Lean/Parser/Module.lean b/src/Lean/Parser/Module.lean index b2f9491b45..abf036725f 100644 --- a/src/Lean/Parser/Module.lean +++ b/src/Lean/Parser/Module.lean @@ -37,8 +37,8 @@ structure ModuleParserState where recovering : Bool := false deriving Inhabited -private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Error) : Message := Id.run do - let mut pos := s.pos +private partial def mkErrorMessage (c : InputContext) (pos : String.Pos) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do + let mut pos := pos let mut endPos? := none let mut e := e unless e.unexpectedTk.isMissing do @@ -53,7 +53,7 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro e := { e with unexpected } -- if there is an unexpected token, include preceding whitespace as well as the expected token could -- be inserted at any of these places to fix the error; see tests/lean/1971.lean - if let .original (trailing := trailing) .. := s.stxStack.back.getTailInfo then + if let some trailing := lastTrailing stk then if trailing.stopPos == pos then pos := trailing.startPos { fileName := c.fileName @@ -61,6 +61,12 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro endPos := c.fileMap.toPosition <$> endPos? keepFullRange := true data := toString e } +where + -- Error recovery might lead to there being some "junk" on the stack + lastTrailing (s : SyntaxStack) : Option Substring := + s.toSubarray.findSomeRevM? (m := Id) fun stx => + if let .original (trailing := trailing) .. := stx.getTailInfo then pure (some trailing) + else none def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × MessageLog) := do let dummyEnv ← mkEmptyEnvironment @@ -68,12 +74,10 @@ def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × M let tokens := Module.updateTokens (getTokenTable dummyEnv) let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input) let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back - match s.errorMsg with - | some errorMsg => - let msg := mkErrorMessage inputCtx s errorMsg - pure (stx, { pos := s.pos, recovering := true }, { : MessageLog }.add msg) - | none => - pure (stx, { pos := s.pos }, {}) + let mut messages : MessageLog := {} + for (pos, stk, err) in s.allErrors do + messages := messages.add <| mkErrorMessage inputCtx pos stk err + pure (stx, {pos := s.pos, recovering := s.hasError}, messages) private def mkEOI (pos : String.Pos) : Syntax := let atom := mkAtom (SourceInfo.original "".toSubstring pos "".toSubstring pos) "" @@ -104,6 +108,9 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) let pos' := pos let p := andthenFn whitespace topLevelCommandParserFn let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos } + -- save errors from sub-recoveries + for (rpos, rstk, recovered) in s.recoveredErrors do + messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered pos := s.pos if recovering && !s.stxStack.isEmpty && s.stxStack.back.isAntiquot then -- top-level antiquotation during recovery is most likely remnant from unfinished quotation, ignore @@ -122,7 +129,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) We claim a syntactically incorrect command containing no token or identifier is irrelevant for intellisense and should be ignored. -/ let ignore := s.stxStack.isEmpty || s.stxStack.back.getPos?.isNone unless recovering && ignore do - messages := messages.add <| mkErrorMessage inputCtx s errorMsg + messages := messages.add <| mkErrorMessage inputCtx s.pos s.stxStack errorMsg recovering := true if ignore then continue diff --git a/src/Lean/Parser/Types.lean b/src/Lean/Parser/Types.lean index 635f85ff97..30f4a31663 100644 --- a/src/Lean/Parser/Types.lean +++ b/src/Lean/Parser/Types.lean @@ -199,6 +199,7 @@ structure ParserState where pos : String.Pos := 0 cache : ParserCache errorMsg : Option Error := none + recoveredErrors : Array (String.Pos × SyntaxStack × Error) := #[] namespace ParserState @@ -233,16 +234,9 @@ def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState := def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState := { s with pos := input.next' pos h } -def toErrorMsg (ctx : InputContext) (s : ParserState) : String := - match s.errorMsg with - | none => "" - | some msg => - let pos := ctx.fileMap.toPosition s.pos - mkErrorStringWithPos ctx.fileName pos (toString msg) - def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, err⟩ => + | ⟨stack, lhsPrec, pos, cache, err, recovered⟩ => if err != none && stack.size == iniStackSz then -- If there is an error but there are no new nodes on the stack, use `missing` instead. -- Thus we ensure the property that an syntax tree contains (at least) one `missing` node @@ -252,25 +246,28 @@ def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserSta -- `node k1 p1 <|> ... <|> node kn pn` when all parsers fail. With the code below we -- instead return a less misleading single `missing` node without randomly selecting any `ki`. let stack := stack.push Syntax.missing - ⟨stack, lhsPrec, pos, cache, err⟩ + ⟨stack, lhsPrec, pos, cache, err, recovered⟩ else let newNode := Syntax.node SourceInfo.none k (stack.extract iniStackSz stack.size) let stack := stack.shrink iniStackSz let stack := stack.push newNode - ⟨stack, lhsPrec, pos, cache, err⟩ + ⟨stack, lhsPrec, pos, cache, err, recovered⟩ def mkTrailingNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, err⟩ => + | ⟨stack, lhsPrec, pos, cache, err, errs⟩ => let newNode := Syntax.node SourceInfo.none k (stack.extract (iniStackSz - 1) stack.size) let stack := stack.shrink (iniStackSz - 1) let stack := stack.push newNode - ⟨stack, lhsPrec, pos, cache, err⟩ + ⟨stack, lhsPrec, pos, cache, err, errs⟩ + +def allErrors (s : ParserState) : Array (String.Pos × SyntaxStack × Error) := + s.recoveredErrors ++ (s.errorMsg.map (fun e => #[(s.pos, s.stxStack, e)])).getD #[] @[inline] def setError (s : ParserState) (e : Error) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨stack, lhsPrec, pos, cache, some e⟩ + | ⟨stack, lhsPrec, pos, cache, _, errs⟩ => ⟨stack, lhsPrec, pos, cache, some e, errs⟩ def mkError (s : ParserState) (msg : String) : ParserState := s.setError { expected := [msg] } |>.pushSyntax .missing @@ -314,6 +311,14 @@ def mkUnexpectedTokenError (s : ParserState) (msg : String) (iniPos : String.Pos def mkUnexpectedErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState := s.setPos pos |>.mkUnexpectedError msg +def toErrorMsg (ctx : InputContext) (s : ParserState) : String := Id.run do + let mut errStr := "" + for (pos, _stk, err) in s.allErrors do + if errStr != "" then errStr := errStr ++ "\n" + let pos := ctx.fileMap.toPosition pos + errStr := errStr ++ mkErrorStringWithPos ctx.fileName pos (toString err) + errStr + end ParserState def ParserFn := ParserContext → ParserState → ParserState @@ -416,7 +421,7 @@ def withCacheFn (parserName : Name) (p : ParserFn) : ParserFn := fun c s => Id.r if let some r := s.cache.parserCache.find? key then -- TODO: turn this into a proper trace once we have these in the parser --dbg_trace "parser cache hit: {parserName}:{s.pos} -> {r.stx}" - return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg⟩ + return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg, s.recoveredErrors⟩ let initStackSz := s.stxStack.raw.size let s := withStackDrop initStackSz p c { s with lhsPrec := 0, errorMsg := none } if s.stxStack.raw.size != initStackSz + 1 then diff --git a/src/Lean/PrettyPrinter/Formatter.lean b/src/Lean/PrettyPrinter/Formatter.lean index cc490a2f55..7566236190 100644 --- a/src/Lean/PrettyPrinter/Formatter.lean +++ b/src/Lean/PrettyPrinter/Formatter.lean @@ -194,6 +194,12 @@ def withMaybeTag (pos? : Option String.Pos) (x : FormatterM Unit) : Formatter := -- them in turn. Uses the syntax traverser non-linearly! p1 <|> p2 +@[combinator_formatter recover] +def recover.formatter (fmt : PrettyPrinter.Formatter) := fmt + +@[combinator_formatter recover'] +def recover'.formatter (fmt : PrettyPrinter.Formatter) := fmt + -- `mkAntiquot` is quite complex, so we'd rather have its formatter synthesized below the actual parser definition. -- Note that there is a mutual recursion -- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere diff --git a/src/Lean/PrettyPrinter/Parenthesizer.lean b/src/Lean/PrettyPrinter/Parenthesizer.lean index 6ea0658a39..f2509edd85 100644 --- a/src/Lean/PrettyPrinter/Parenthesizer.lean +++ b/src/Lean/PrettyPrinter/Parenthesizer.lean @@ -267,6 +267,12 @@ def visitToken : Parenthesizer := do -- them in turn. Uses the syntax traverser non-linearly! p1 <|> p2 +@[combinator_parenthesizer recover] +def recover.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p + +@[combinator_parenthesizer recover'] +def recover'.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p + -- `mkAntiquot` is quite complex, so we'd rather have its parenthesizer synthesized below the actual parser definition. -- Note that there is a mutual recursion -- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere diff --git a/stage0/src/stdlib_flags.h b/stage0/src/stdlib_flags.h index 0699845ba4..cb8a954a4d 100644 --- a/stage0/src/stdlib_flags.h +++ b/stage0/src/stdlib_flags.h @@ -6,7 +6,7 @@ options get_default_options() { // see https://lean-lang.org/lean4/doc/dev/bootstrap.html#further-bootstrapping-complications #if LEAN_IS_STAGE0 == 1 // switch to `true` for ABI-breaking changes affecting meta code - opts = opts.update({"interpreter", "prefer_native"}, false); + opts = opts.update({"interpreter", "prefer_native"}, true); // switch to `true` for changing built-in parsers used in quotations opts = opts.update({"internal", "parseQuotWithCurrentStage"}, false); // toggling `parseQuotWithCurrentStage` may also require toggling the following option if macros/syntax diff --git a/tests/lean/1971.lean.expected.out b/tests/lean/1971.lean.expected.out index c383f7f5e6..8b0c405839 100644 --- a/tests/lean/1971.lean.expected.out +++ b/tests/lean/1971.lean.expected.out @@ -2,5 +2,6 @@ 1971.lean:22:6-24:3: error: unexpected token 'def'; expected ':=', 'where' or '|' 1971.lean:30:9-30:13: error: unexpected token 'def'; expected term 1971.lean:34:9-36:7: error: unexpected token 'example'; expected identifier +1971.lean:36:11-36:12: error: unexpected token; expected command 1971.lean:39:17-41:7: error: unexpected token 'example'; expected 'false', 'true', numeral or string literal 1971.lean:45:11: error: unterminated string literal diff --git a/tests/lean/parserRecovery.lean b/tests/lean/parserRecovery.lean new file mode 100644 index 0000000000..08be3555ee --- /dev/null +++ b/tests/lean/parserRecovery.lean @@ -0,0 +1,114 @@ +import Lean + +/-! +# Parser error recovery + +This test is to make sure that the error recovery feature is working as intended. In particular, it +checks that certain errors in the head of a declaration (e.g. an invalid name) don't prevent further +parse errors from being reported. +-/ + +open Lean Parser + +/-! +## Error recovery for Lean commands +-/ +def "foo" (x : Nat) : := 5 + +def 5 : Nat := 3 + +theorem yep : True := by trivial + +theorem () : Nat := 99 +theorem 2.4 : Nat := ) +theorem "nat" : Nat := ) +theorem 3 : Nat := ) + +opaque ((( : Nat := ) + +axiom "nope" : 4 = ⟩ + +class inductive () where + | "" : ) + +inductive 44: Type where | "" : 44 + +inductive 44 : Type where | "" : 44 + +inductive 44 (n : Nat) : Type where | "" : 44 + +inductive 44(n : Nat) : Type where | "" : 44 + +inductive 44| "" : 44 + +inductive !!! where + | "" (x : Nat)) : Bogus + +#eval Foo.a + +inductive Item.{4, 5, 6, 7, a, "foo", b, c} : Nat where + | foo : ) + | bar : ) + +example := "" + +/-! +## Error recovery for custom syntax + +Adding recovery to Lean's term grammar doesn't work well with the primitive tools at hand +(extensibility + token tables + backtracking are a real challenge), but for DSLs it can be useful. +This tests that error recovery works for a term syntax extension. +-/ + +def altParen : Parser := + "{|" >> termParser (prec := 51) >> recover "|}" skip + +macro:50 e:altParen : term => pure ⟨e.raw[1]⟩ + +-- These terms show multiple errors due to recovery from missing |} tokens +#eval ([ {|2 + 3] * {| 3 + |} + +-- Only one error here +#eval ([ (2 + 3] * ( 3 + ) ) + +example := "" + +/-! +## Error recovery for custom commands + +This command can recover from many parse errors. Make sure that there's no arbitrary small limit by +testing a few recoveries. +-/ + +def nonws : Parser where + fn := + andthenFn + (andthenFn (satisfyFn (fun c => !c.isWhitespace && c != '#')) (takeWhileFn (!·.isWhitespace))) + whitespace + +def thing : Parser := + recover ident nonws + + +open Lean Elab Command in +elab "#go " xs:thing* (Lean.Parser.eoi <|> "#stop") : command => do + let xs := toString xs + elabCommand <| ← `(#eval s!"hey {$(quote xs)}") + +-- Many errors due to recovery! +#go +hey +5 +a +g +def +5 +hey +( 99 + ( +#stop + +-- Even though the command had errors, this is still running as expected: + +def x := 5 +#eval x diff --git a/tests/lean/parserRecovery.lean.expected.out b/tests/lean/parserRecovery.lean.expected.out new file mode 100644 index 0000000000..fa6cbd293d --- /dev/null +++ b/tests/lean/parserRecovery.lean.expected.out @@ -0,0 +1,46 @@ +parserRecovery.lean:16:3-16:9: error: unexpected token; expected identifier +parserRecovery.lean:16:21-16:25: error: unexpected token ':='; expected term +parserRecovery.lean:18:3-18:5: error: unexpected token; expected identifier +parserRecovery.lean:22:7-22:9: error: unexpected token '('; expected identifier +parserRecovery.lean:22:9-22:10: error: unexpected token ')'; expected '_' or identifier +parserRecovery.lean:23:7-23:11: error: unexpected token; expected identifier +parserRecovery.lean:23:20-23:22: error: unexpected token ')'; expected term +parserRecovery.lean:24:7-24:13: error: unexpected token; expected identifier +parserRecovery.lean:24:22-24:24: error: unexpected token ')'; expected term +parserRecovery.lean:25:7-25:9: error: unexpected token; expected identifier +parserRecovery.lean:25:18-25:20: error: unexpected token ')'; expected term +parserRecovery.lean:27:6-27:8: error: unexpected token '('; expected identifier +parserRecovery.lean:27:8-27:9: error: unexpected token '('; expected '_' or identifier +parserRecovery.lean:29:5-29:12: error: unexpected token; expected identifier +parserRecovery.lean:29:18-29:20: error: unexpected token '⟩'; expected term +parserRecovery.lean:31:15-31:17: error: unexpected token '('; expected identifier +parserRecovery.lean:31:17-31:18: error: unexpected token ')'; expected '_' or identifier +parserRecovery.lean:34:9-34:12: error: unexpected token; expected identifier +parserRecovery.lean:34:27: error: expected token +parserRecovery.lean:36:9-36:12: error: unexpected token; expected identifier +parserRecovery.lean:36:28: error: expected token +parserRecovery.lean:38:9-38:12: error: unexpected token; expected identifier +parserRecovery.lean:38:38: error: expected token +parserRecovery.lean:40:9-40:12: error: unexpected token; expected identifier +parserRecovery.lean:40:37: error: expected token +parserRecovery.lean:42:9-42:12: error: unexpected token; expected identifier +parserRecovery.lean:42:14: error: expected token +parserRecovery.lean:44:9-44:11: error: unexpected token '!'; expected identifier +parserRecovery.lean:45:4: error: expected token +parserRecovery.lean:47:6-47:11: error: unknown identifier 'Foo.a' +parserRecovery.lean:49:16-49:17: error: unexpected token; expected identifier +parserRecovery.lean:49:18-49:20: error: unexpected token; expected identifier +parserRecovery.lean:49:21-49:23: error: unexpected token; expected identifier +parserRecovery.lean:49:24-49:26: error: unexpected token; expected identifier +parserRecovery.lean:49:30-49:36: error: unexpected token; expected identifier +parserRecovery.lean:50:9-50:11: error: unexpected token ')'; expected term +parserRecovery.lean:69:16-69:17: error: unexpected token ']'; expected '|}' +parserRecovery.lean:69:20: error: unexpected token at this precedence level; consider parenthesizing the term +parserRecovery.lean:72:15-72:16: error: unexpected token ']'; expected ')', ',' or ':' +parserRecovery.lean:100:3-101:1: error: unexpected token; expected identifier +parserRecovery.lean:103:1-104:3: error: unexpected token 'def'; expected identifier +parserRecovery.lean:105:0-105:1: error: unexpected token; expected identifier +parserRecovery.lean:106:3-107:1: error: unexpected token '('; expected identifier +parserRecovery.lean:107:6-107:8: error: unexpected token; expected identifier +parserRecovery.lean:108:2-108:3: error: unexpected token '('; expected identifier +5