feat: custom error recovery in parser (#3413)

Adds a simple error-recovery mechanism to Lean's parser, similar to those used in other combinator parsing libraries. Lean itself isn't very amenable to error recovery with this mechanism, as it requires global knowledge of the grammar in question to write recovery rules that don't break backtracking or `<|>`. I only found a few opportunities. But for DSLs, this is really important. In particular, Verso parse errors interacted very badly with Lean parse errors in a way that required frequent "restart file" commands, but this mechanism allows me to both recover from Verso parse errors and to have Lean skip the rest of the file rather than repeatedly trying to parse it as Lean commands.
2024-02-21 15:29:54 +01:00 · 2024-02-21 15:29:54 +01:00 · 74e7886ce7
commit 74e7886ce7
parent 0fb936158b
12 changed files with 304 additions and 45 deletions
--- a/src/Lean/Parser.lean
+++ b/src/Lean/Parser.lean
@ -47,6 +47,7 @@ builtin_initialize
  register_parser_alias (kind := interpolatedStrKind) interpolatedStr
  register_parser_alias orelse
  register_parser_alias andthen { stackSz? := none }
+  register_parser_alias recover

  registerAlias "notFollowedBy" ``notFollowedBy (notFollowedBy · "element")
  Parenthesizer.registerAlias "notFollowedBy" notFollowedBy.parenthesizer
--- a/src/Lean/Parser/Basic.lean
+++ b/src/Lean/Parser/Basic.lean
@ -203,8 +203,9 @@ def trailingNode (n : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Parser) : Traili

 def mergeOrElseErrors (s : ParserState) (error1 : Error) (iniPos : String.Pos) (mergeErrors : Bool) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, some error2⟩ =>
-    if pos == iniPos then ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2)⟩
+  | ⟨stack, lhsPrec, pos, cache, some error2, errs⟩ =>
+    if pos == iniPos then
+      ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2), errs⟩
    else s
  | other => other

@ -285,7 +286,7 @@ instance : OrElse Parser where
 def atomicFn (p : ParserFn) : ParserFn := fun c s =>
  let iniPos := s.pos
  match p c s with
-  | ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩
+  | ⟨stack, lhsPrec, _, cache, some msg, errs⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg, errs⟩
  | other                       => other

 /-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails,
@ -296,6 +297,58 @@ This is important for the `p <|> q` combinator, because it is not backtracking,
 This parser has the same arity as `p` - it produces the same result as `p`. -/
 def atomic : Parser → Parser := withFn atomicFn

+/-- Information about the state of the parse prior to the failing parser's execution -/
+structure RecoveryContext where
+  /-- The position prior to the failing parser -/
+  initialPos : String.Pos
+  /-- The syntax stack height prior to the failing parser's execution -/
+  initialSize : Nat
+deriving BEq, DecidableEq, Repr
+
+/--
+Recover from errors in `p` using `recover` to consume input until a known-good state has appeared.
+If `recover` fails itself, then no recovery is performed.
+
+`recover` is provided with information about the failing parser's effects , and it is run in the
+state immediately after the failure. -/
+def recoverFn (p : ParserFn) (recover : RecoveryContext → ParserFn) : ParserFn := fun c s =>
+  let iniPos := s.pos
+  let iniSz := s.stxStack.size
+  let s' := p c s
+  if let some msg := s'.errorMsg then
+    let s' := recover ⟨iniPos, iniSz⟩ c {s' with errorMsg := none}
+    if s'.hasError then s'
+    else {s' with
+            pos := s'.pos,
+            lhsPrec := s'.lhsPrec,
+            cache := s'.cache,
+            errorMsg := none,
+            recoveredErrors := s'.recoveredErrors.push (s'.pos, s'.stxStack, msg) }
+  else s'
+
+/--
+Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared.
+If `handler` fails itself, then no recovery is performed.
+
+`handler` is provided with information about the failing parser's effects , and it is run in the
+state immediately after the failure.
+
+The interactions between <|> and `recover'` are subtle, especially for syntactic
+categories that admit user extension. Consider avoiding it in these cases. -/
+def recover' (parser : Parser) (handler : RecoveryContext → Parser) : Parser where
+  info := parser.info
+  fn := recoverFn parser.fn fun s => handler s |>.fn
+
+/--
+Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared.
+If `handler` fails itself, then no recovery is performed.
+
+`handler` is run in the state immediately after the failure.
+
+The interactions between <|> and `recover` are subtle, especially for syntactic
+categories that admit user extension. Consider avoiding it in these cases. -/
+def recover (parser handler : Parser) : Parser := recover' parser fun _ => handler
+
 def optionalFn (p : ParserFn) : ParserFn := fun c s =>
  let iniSz  := s.stackSize
  let iniPos := s.pos
@ -956,11 +1009,11 @@ private def tokenFnAux : ParserFn := fun c s =>
 private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState :=
  -- do not cache token parsing errors, which are rare and usually fatal and thus not worth an extra field in `TokenCache`
  match s with
-  | ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none⟩ =>
+  | ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none, errs⟩ =>
    if stack.size == 0 then s
    else
      let tk := stack.back
-      ⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none⟩
+      ⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none, errs⟩
  | other => other

 def tokenFn (expected : List String := []) : ParserFn := fun c s =>
@ -1259,21 +1312,24 @@ def keepTop (s : SyntaxStack) (startStackSize : Nat) : SyntaxStack :=

 def keepNewError (s : ParserState) (oldStackSize : Nat) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, err⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err⟩
+  | ⟨stack, lhsPrec, pos, cache, err, errs⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err, errs⟩

 def keepPrevError (s : ParserState) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option Error) (oldLhsPrec : Nat) : ParserState :=
  match s with
-  | ⟨stack, _, _, cache, _⟩ => ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError⟩
+  | ⟨stack, _, _, cache, _, errs⟩ =>
+    ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError, errs⟩

 def mergeErrors (s : ParserState) (oldStackSize : Nat) (oldError : Error) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, some err⟩ =>
-    ⟨stack.shrink oldStackSize, lhsPrec, pos, cache, if oldError == err then some err else some (oldError.merge err)⟩
+  | ⟨stack, lhsPrec, pos, cache, some err, errs⟩ =>
+    let newError := if oldError == err then err else oldError.merge err
+    ⟨stack.shrink oldStackSize, lhsPrec, pos, cache, some newError, errs⟩
  | other                         => other

 def keepLatest (s : ParserState) (startStackSize : Nat) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none⟩
+  | ⟨stack, lhsPrec, pos, cache, _, errs⟩ =>
+    ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none, errs⟩

 def replaceLongest (s : ParserState) (startStackSize : Nat) : ParserState :=
  s.keepLatest startStackSize
--- a/src/Lean/Parser/Command.lean
+++ b/src/Lean/Parser/Command.lean
@ -18,6 +18,21 @@ namespace Parser

 namespace Command

+/-- Skip input until the next character that satisfies the predicate, then skip whitespace -/
+private def skipUntil (pred : Char → Bool) : Parser where
+  fn :=
+    andthenFn
+      (takeUntilFn pred)
+      (takeWhileFn Char.isWhitespace)
+
+/-- Skip input until the next whitespace character (used for error recovery for certain tokens) -/
+private def skipUntilWs : Parser := skipUntil Char.isWhitespace
+
+/-- Skip input until the next whitespace character or delimiter (used for error recovery for certain
+    tokens, especially names that occur in signatures) -/
+private def skipUntilWsOrDelim : Parser := skipUntil fun c =>
+  c.isWhitespace || c == '(' || c == ')' || c == ':' || c == '{' || c == '}' || c == '|'
+
 /--
 Syntax quotation for (sequences of) commands.
 The identical syntax for term quotations takes priority,
@ -68,7 +83,7 @@ def declModifiers (inline : Bool) := leading_parser
  optional («partial» <|> «nonrec»)
 /-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/
 def declId           := leading_parser
-  ident >> optional (".{" >> sepBy1 ident ", " >> "}")
+  ident >> optional (".{" >> sepBy1 (recover ident (skipUntil (fun c => c.isWhitespace || c ∈ [',', '}']))) ", " >> "}")
 /-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/
 def declSig          := leading_parser
  many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec
@ -100,18 +115,18 @@ def «abbrev»         := leading_parser
 def optDefDeriving   :=
  optional (ppDedent ppLine >> atomic ("deriving " >> notSymbol "instance") >> sepBy1 ident ", ")
 def «def»            := leading_parser
-  "def " >> declId >> ppIndent optDeclSig >> declVal >> optDefDeriving
+  "def " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> declVal >> optDefDeriving
 def «theorem»        := leading_parser
-  "theorem " >> declId >> ppIndent declSig >> declVal
+  "theorem " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> declVal
 def «opaque»         := leading_parser
-  "opaque " >> declId >> ppIndent declSig >> optional declValSimple
+  "opaque " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> optional declValSimple
 /- As `declSig` starts with a space, "instance" does not need a trailing space
  if we put `ppSpace` in the optional fragments. -/
 def «instance»       := leading_parser
  Term.attrKind >> "instance" >> optNamedPrio >>
  optional (ppSpace >> declId) >> ppIndent declSig >> declVal
 def «axiom»          := leading_parser
-  "axiom " >> declId >> ppIndent declSig
+  "axiom " >> recover declId skipUntilWsOrDelim >> ppIndent declSig
 /- As `declSig` starts with a space, "example" does not need a trailing space. -/
 def «example»        := leading_parser
  "example" >> ppIndent optDeclSig >> declVal
@ -144,11 +159,11 @@ or an element `head : α` followed by a list `tail : List α`.
 For more information about [inductive types](https://lean-lang.org/theorem_proving_in_lean4/inductive_types.html).
 -/
 def «inductive»      := leading_parser
-  "inductive " >> declId >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >>
+  "inductive " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >>
  many ctor >> optional (ppDedent ppLine >> computedFields) >> optDeriving
 def classInductive   := leading_parser
  atomic (group (symbol "class " >> "inductive ")) >>
-  declId >> ppIndent optDeclSig >>
+  recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >>
  optional (symbol " :=" <|> " where") >> many ctor >> optDeriving
 def structExplicitBinder := leading_parser
  atomic (declModifiers true >> "(") >>
@ -175,7 +190,9 @@ def classTk              := leading_parser
 def «extends»            := leading_parser
  " extends " >> sepBy1 termParser ", "
 def «structure»          := leading_parser
-    (structureTk <|> classTk) >> declId >>
+    (structureTk <|> classTk) >>
+    -- Note: no error recovery here due to clashing with the `class abbrev` syntax
+    declId >>
    ppIndent (many (ppSpace >> Term.bracketedBinder) >> optional «extends» >> Term.optType) >>
    optional ((symbol " := " <|> " where ") >> optional structCtor >> structFields) >>
    optDeriving
@ -184,7 +201,7 @@ def «structure»          := leading_parser
  («abbrev» <|> «def» <|> «theorem» <|> «opaque» <|> «instance» <|> «axiom» <|> «example» <|>
   «inductive» <|> classInductive <|> «structure»)
@[builtin_command_parser] def «deriving»     := leading_parser
-  "deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 ident ", "
+  "deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 (recover ident skip) ", "
@[builtin_command_parser] def noncomputableSection := leading_parser
  "noncomputable " >> "section" >> optional (ppSpace >> ident)
@[builtin_command_parser] def «section»      := leading_parser
--- a/src/Lean/Parser/Extension.lean
+++ b/src/Lean/Parser/Extension.lean
@ -452,7 +452,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil
  let p := andthenFn whitespace (categoryParserFnImpl catName)
  let ictx := mkInputContext input fileName
  let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input)
-  if s.hasError then
+  if !s.allErrors.isEmpty  then
    Except.error (s.toErrorMsg ictx)
  else if input.atEnd s.pos then
    Except.ok s.stxStack.back
--- a/src/Lean/Parser/Module.lean
+++ b/src/Lean/Parser/Module.lean
@ -37,8 +37,8 @@ structure ModuleParserState where
  recovering : Bool       := false
  deriving Inhabited

-private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Error) : Message := Id.run do
-  let mut pos := s.pos
+private partial def mkErrorMessage (c : InputContext) (pos : String.Pos) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do
+  let mut pos := pos
  let mut endPos? := none
  let mut e := e
  unless e.unexpectedTk.isMissing do
@ -53,7 +53,7 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro
    e := { e with unexpected }
    -- if there is an unexpected token, include preceding whitespace as well as the expected token could
    -- be inserted at any of these places to fix the error; see tests/lean/1971.lean
-    if let .original (trailing := trailing) .. := s.stxStack.back.getTailInfo then
+    if let some trailing := lastTrailing stk then
      if trailing.stopPos == pos then
        pos := trailing.startPos
  { fileName := c.fileName
@ -61,6 +61,12 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro
    endPos := c.fileMap.toPosition <$> endPos?
    keepFullRange := true
    data := toString e }
+where
+  -- Error recovery might lead to there being some "junk" on the stack
+  lastTrailing (s : SyntaxStack) : Option Substring :=
+    s.toSubarray.findSomeRevM? (m := Id) fun stx =>
+      if let .original (trailing := trailing) .. := stx.getTailInfo then pure (some trailing)
+        else none

 def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × MessageLog) := do
  let dummyEnv ← mkEmptyEnvironment
@ -68,12 +74,10 @@ def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × M
  let tokens := Module.updateTokens (getTokenTable dummyEnv)
  let s   := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input)
  let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back
-  match s.errorMsg with
-  | some errorMsg =>
-    let msg := mkErrorMessage inputCtx s errorMsg
-    pure (stx, { pos := s.pos, recovering := true }, { : MessageLog }.add msg)
-  | none =>
-    pure (stx, { pos := s.pos }, {})
+  let mut messages : MessageLog := {}
+  for (pos, stk, err) in s.allErrors do
+    messages := messages.add <| mkErrorMessage inputCtx pos stk err
+  pure (stx, {pos := s.pos, recovering := s.hasError}, messages)

 private def mkEOI (pos : String.Pos) : Syntax :=
  let atom := mkAtom (SourceInfo.original "".toSubstring pos "".toSubstring pos) ""
@ -104,6 +108,9 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
    let pos' := pos
    let p := andthenFn whitespace topLevelCommandParserFn
    let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos }
+    -- save errors from sub-recoveries
+    for (rpos, rstk, recovered) in s.recoveredErrors do
+      messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered
    pos := s.pos
    if recovering && !s.stxStack.isEmpty && s.stxStack.back.isAntiquot then
      -- top-level antiquotation during recovery is most likely remnant from unfinished quotation, ignore
@ -122,7 +129,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
          We claim a syntactically incorrect command containing no token or identifier is irrelevant for intellisense and should be ignored. -/
      let ignore := s.stxStack.isEmpty || s.stxStack.back.getPos?.isNone
      unless recovering && ignore do
-        messages := messages.add <| mkErrorMessage inputCtx s errorMsg
+        messages := messages.add <| mkErrorMessage inputCtx s.pos s.stxStack errorMsg
      recovering := true
      if ignore then
        continue
--- a/src/Lean/Parser/Types.lean
+++ b/src/Lean/Parser/Types.lean
@ -199,6 +199,7 @@ structure ParserState where
  pos      : String.Pos := 0
  cache    : ParserCache
  errorMsg : Option Error := none
+  recoveredErrors : Array (String.Pos × SyntaxStack × Error) := #[]

 namespace ParserState

@ -233,16 +234,9 @@ def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState :=
 def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState :=
  { s with pos := input.next' pos h }

-def toErrorMsg (ctx : InputContext) (s : ParserState) : String :=
-  match s.errorMsg with
-  | none     => ""
-  | some msg =>
-    let pos := ctx.fileMap.toPosition s.pos
-    mkErrorStringWithPos ctx.fileName pos (toString msg)
-
 def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, err⟩ =>
+  | ⟨stack, lhsPrec, pos, cache, err, recovered⟩ =>
    if err != none && stack.size == iniStackSz then
      -- If there is an error but there are no new nodes on the stack, use `missing` instead.
      -- Thus we ensure the property that an syntax tree contains (at least) one `missing` node
@ -252,25 +246,28 @@ def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserSta
      -- `node k1 p1 <|> ... <|> node kn pn` when all parsers fail. With the code below we
      -- instead return a less misleading single `missing` node without randomly selecting any `ki`.
      let stack   := stack.push Syntax.missing
-      ⟨stack, lhsPrec, pos, cache, err⟩
+      ⟨stack, lhsPrec, pos, cache, err, recovered⟩
    else
      let newNode := Syntax.node SourceInfo.none k (stack.extract iniStackSz stack.size)
      let stack   := stack.shrink iniStackSz
      let stack   := stack.push newNode
-      ⟨stack, lhsPrec, pos, cache, err⟩
+      ⟨stack, lhsPrec, pos, cache, err, recovered⟩

 def mkTrailingNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, err⟩ =>
+  | ⟨stack, lhsPrec, pos, cache, err, errs⟩ =>
    let newNode := Syntax.node SourceInfo.none k (stack.extract (iniStackSz - 1) stack.size)
    let stack   := stack.shrink (iniStackSz - 1)
    let stack   := stack.push newNode
-    ⟨stack, lhsPrec, pos, cache, err⟩
+    ⟨stack, lhsPrec, pos, cache, err, errs⟩
+
+def allErrors (s : ParserState) : Array (String.Pos × SyntaxStack × Error) :=
+  s.recoveredErrors ++ (s.errorMsg.map (fun e => #[(s.pos, s.stxStack, e)])).getD #[]

@[inline]
 def setError (s : ParserState) (e : Error) : ParserState :=
  match s with
-  | ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨stack, lhsPrec, pos, cache, some e⟩
+  | ⟨stack, lhsPrec, pos, cache, _, errs⟩ => ⟨stack, lhsPrec, pos, cache, some e, errs⟩

 def mkError (s : ParserState) (msg : String) : ParserState :=
  s.setError { expected := [msg] } |>.pushSyntax .missing
@ -314,6 +311,14 @@ def mkUnexpectedTokenError (s : ParserState) (msg : String) (iniPos : String.Pos
 def mkUnexpectedErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState :=
  s.setPos pos |>.mkUnexpectedError msg

+def toErrorMsg (ctx : InputContext) (s : ParserState) : String := Id.run do
+  let mut errStr := ""
+  for (pos, _stk, err) in s.allErrors do
+    if errStr != "" then errStr := errStr ++ "\n"
+    let pos := ctx.fileMap.toPosition pos
+    errStr := errStr ++ mkErrorStringWithPos ctx.fileName pos (toString err)
+  errStr
+
 end ParserState

 def ParserFn := ParserContext → ParserState → ParserState
@ -416,7 +421,7 @@ def withCacheFn (parserName : Name) (p : ParserFn) : ParserFn := fun c s => Id.r
  if let some r := s.cache.parserCache.find? key then
    -- TODO: turn this into a proper trace once we have these in the parser
    --dbg_trace "parser cache hit: {parserName}:{s.pos} -> {r.stx}"
-    return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg⟩
+    return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg, s.recoveredErrors⟩
  let initStackSz := s.stxStack.raw.size
  let s := withStackDrop initStackSz p c { s with lhsPrec := 0, errorMsg := none }
  if s.stxStack.raw.size != initStackSz + 1 then
--- a/src/Lean/PrettyPrinter/Formatter.lean
+++ b/src/Lean/PrettyPrinter/Formatter.lean
@ -194,6 +194,12 @@ def withMaybeTag (pos? : Option String.Pos) (x : FormatterM Unit) : Formatter :=
    -- them in turn. Uses the syntax traverser non-linearly!
    p1 <|> p2

+@[combinator_formatter recover]
+def recover.formatter (fmt : PrettyPrinter.Formatter) := fmt
+
+@[combinator_formatter recover']
+def recover'.formatter (fmt : PrettyPrinter.Formatter) := fmt
+
 -- `mkAntiquot` is quite complex, so we'd rather have its formatter synthesized below the actual parser definition.
 -- Note that there is a mutual recursion
 -- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere
--- a/src/Lean/PrettyPrinter/Parenthesizer.lean
+++ b/src/Lean/PrettyPrinter/Parenthesizer.lean
@ -267,6 +267,12 @@ def visitToken : Parenthesizer := do
    -- them in turn. Uses the syntax traverser non-linearly!
    p1 <|> p2

+@[combinator_parenthesizer recover]
+def recover.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p
+
+@[combinator_parenthesizer recover']
+def recover'.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p
+
 -- `mkAntiquot` is quite complex, so we'd rather have its parenthesizer synthesized below the actual parser definition.
 -- Note that there is a mutual recursion
 -- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere
--- a/stage0/src/stdlib_flags.h
+++ b/stage0/src/stdlib_flags.h
@ -6,7 +6,7 @@ options get_default_options() {
    // see https://lean-lang.org/lean4/doc/dev/bootstrap.html#further-bootstrapping-complications
 #if LEAN_IS_STAGE0 == 1
    // switch to `true` for ABI-breaking changes affecting meta code
-    opts = opts.update({"interpreter", "prefer_native"}, false);
+    opts = opts.update({"interpreter", "prefer_native"}, true);
    // switch to `true` for changing built-in parsers used in quotations
    opts = opts.update({"internal", "parseQuotWithCurrentStage"}, false);
    // toggling `parseQuotWithCurrentStage` may also require toggling the following option if macros/syntax
--- a/tests/lean/1971.lean.expected.out
+++ b/tests/lean/1971.lean.expected.out
@ -2,5 +2,6 @@
 1971.lean:22:6-24:3: error: unexpected token 'def'; expected ':=', 'where' or '|'
 1971.lean:30:9-30:13: error: unexpected token 'def'; expected term
 1971.lean:34:9-36:7: error: unexpected token 'example'; expected identifier
+1971.lean:36:11-36:12: error: unexpected token; expected command
 1971.lean:39:17-41:7: error: unexpected token 'example'; expected 'false', 'true', numeral or string literal
 1971.lean:45:11: error: unterminated string literal
--- a/tests/lean/parserRecovery.lean
+++ b/tests/lean/parserRecovery.lean
@ -0,0 +1,114 @@
+import Lean
+
+/-!
+# Parser error recovery
+
+This test is to make sure that the error recovery feature is working as intended. In particular, it
+checks that certain errors in the head of a declaration (e.g. an invalid name) don't prevent further
+parse errors from being reported.
+-/
+
+open Lean Parser
+
+/-!
+## Error recovery for Lean commands
+-/
+def "foo" (x : Nat) :  := 5
+
+def 5 : Nat := 3
+
+theorem yep : True := by trivial
+
+theorem () : Nat := 99
+theorem 2.4 : Nat := )
+theorem "nat" : Nat := )
+theorem 3 : Nat := )
+
+opaque ((( : Nat := )
+
+axiom "nope" : 4 = ⟩
+
+class inductive () where
+  | "" : )
+
+inductive 44: Type where | "" : 44
+
+inductive 44 : Type where | "" : 44
+
+inductive 44 (n : Nat) : Type where | "" : 44
+
+inductive 44(n : Nat) : Type where | "" : 44
+
+inductive 44| "" : 44
+
+inductive !!! where
+  | "" (x : Nat)) : Bogus
+
+#eval Foo.a
+
+inductive Item.{4, 5, 6, 7, a, "foo", b, c} : Nat where
+  | foo : )
+  | bar : )
+
+example := ""
+
+/-!
+## Error recovery for custom syntax
+
+Adding recovery to Lean's term grammar doesn't work well with the primitive tools at hand
+(extensibility + token tables + backtracking are a real challenge), but for DSLs it can be useful.
+This tests that error recovery works for a term syntax extension.
+-/
+
+def altParen : Parser :=
+  "{|" >> termParser (prec := 51) >> recover "|}" skip
+
+macro:50 e:altParen : term => pure ⟨e.raw[1]⟩
+
+-- These terms show multiple errors due to recovery from missing |} tokens
+#eval ([ {|2 + 3] * {| 3 + |}
+
+-- Only one error here
+#eval ([ (2 + 3] * ( 3 + ) )
+
+example := ""
+
+/-!
+## Error recovery for custom commands
+
+This command can recover from many parse errors. Make sure that there's no arbitrary small limit by
+testing a few recoveries.
+-/
+
+def nonws : Parser where
+  fn :=
+    andthenFn
+      (andthenFn (satisfyFn (fun c => !c.isWhitespace && c != '#')) (takeWhileFn (!·.isWhitespace)))
+      whitespace
+
+def thing : Parser :=
+  recover ident nonws
+
+
+open Lean Elab Command in
+elab "#go " xs:thing* (Lean.Parser.eoi <|> "#stop") : command => do
+  let xs := toString xs
+  elabCommand <| ← `(#eval s!"hey {$(quote xs)}")
+
+-- Many errors due to recovery!
+#go
+hey
+5
+a
+g
+def
+5
+hey
+(     99
+  (
+#stop
+
+-- Even though the command had errors, this is still running as expected:
+
+def x := 5
+#eval x
--- a/tests/lean/parserRecovery.lean.expected.out
+++ b/tests/lean/parserRecovery.lean.expected.out
@ -0,0 +1,46 @@
+parserRecovery.lean:16:3-16:9: error: unexpected token; expected identifier
+parserRecovery.lean:16:21-16:25: error: unexpected token ':='; expected term
+parserRecovery.lean:18:3-18:5: error: unexpected token; expected identifier
+parserRecovery.lean:22:7-22:9: error: unexpected token '('; expected identifier
+parserRecovery.lean:22:9-22:10: error: unexpected token ')'; expected '_' or identifier
+parserRecovery.lean:23:7-23:11: error: unexpected token; expected identifier
+parserRecovery.lean:23:20-23:22: error: unexpected token ')'; expected term
+parserRecovery.lean:24:7-24:13: error: unexpected token; expected identifier
+parserRecovery.lean:24:22-24:24: error: unexpected token ')'; expected term
+parserRecovery.lean:25:7-25:9: error: unexpected token; expected identifier
+parserRecovery.lean:25:18-25:20: error: unexpected token ')'; expected term
+parserRecovery.lean:27:6-27:8: error: unexpected token '('; expected identifier
+parserRecovery.lean:27:8-27:9: error: unexpected token '('; expected '_' or identifier
+parserRecovery.lean:29:5-29:12: error: unexpected token; expected identifier
+parserRecovery.lean:29:18-29:20: error: unexpected token '⟩'; expected term
+parserRecovery.lean:31:15-31:17: error: unexpected token '('; expected identifier
+parserRecovery.lean:31:17-31:18: error: unexpected token ')'; expected '_' or identifier
+parserRecovery.lean:34:9-34:12: error: unexpected token; expected identifier
+parserRecovery.lean:34:27: error: expected token
+parserRecovery.lean:36:9-36:12: error: unexpected token; expected identifier
+parserRecovery.lean:36:28: error: expected token
+parserRecovery.lean:38:9-38:12: error: unexpected token; expected identifier
+parserRecovery.lean:38:38: error: expected token
+parserRecovery.lean:40:9-40:12: error: unexpected token; expected identifier
+parserRecovery.lean:40:37: error: expected token
+parserRecovery.lean:42:9-42:12: error: unexpected token; expected identifier
+parserRecovery.lean:42:14: error: expected token
+parserRecovery.lean:44:9-44:11: error: unexpected token '!'; expected identifier
+parserRecovery.lean:45:4: error: expected token
+parserRecovery.lean:47:6-47:11: error: unknown identifier 'Foo.a'
+parserRecovery.lean:49:16-49:17: error: unexpected token; expected identifier
+parserRecovery.lean:49:18-49:20: error: unexpected token; expected identifier
+parserRecovery.lean:49:21-49:23: error: unexpected token; expected identifier
+parserRecovery.lean:49:24-49:26: error: unexpected token; expected identifier
+parserRecovery.lean:49:30-49:36: error: unexpected token; expected identifier
+parserRecovery.lean:50:9-50:11: error: unexpected token ')'; expected term
+parserRecovery.lean:69:16-69:17: error: unexpected token ']'; expected '|}'
+parserRecovery.lean:69:20: error: unexpected token at this precedence level; consider parenthesizing the term
+parserRecovery.lean:72:15-72:16: error: unexpected token ']'; expected ')', ',' or ':'
+parserRecovery.lean:100:3-101:1: error: unexpected token; expected identifier
+parserRecovery.lean:103:1-104:3: error: unexpected token 'def'; expected identifier
+parserRecovery.lean:105:0-105:1: error: unexpected token; expected identifier
+parserRecovery.lean:106:3-107:1: error: unexpected token '('; expected identifier
+parserRecovery.lean:107:6-107:8: error: unexpected token; expected identifier
+parserRecovery.lean:108:2-108:3: error: unexpected token '('; expected identifier
+5