diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 0c295b677e..1cab4c7e0e 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -983,7 +983,7 @@ private def nameLitAux (startPos : Nat) : ParserFn := fun c s => let input := c.input let s := identFnAux startPos none Name.anonymous c (s.next input startPos) if s.hasError then - s.mkErrorAt "invalid Name literal" startPos + s else let stx := s.stxStack.back match stx with @@ -1009,6 +1009,7 @@ private def tokenFnAux : ParserFn := fun c s => identFnAux i tk Name.anonymous c s private def updateCache (startPos : Nat) (s : ParserState) : ParserState := + -- do not cache token parsing errors, which are rare and usually fatal and thus not worth an extra field in `TokenCache` match s with | ⟨stack, pos, cache, none⟩ => if stack.size == 0 then s @@ -1030,19 +1031,19 @@ def tokenFn : ParserFn := fun c s => let s := tokenFnAux c s updateCache i s -def peekTokenAux (c : ParserContext) (s : ParserState) : ParserState × Option Syntax := +def peekTokenAux (c : ParserContext) (s : ParserState) : ParserState × Except ParserState Syntax := let iniSz := s.stackSize let iniPos := s.pos let s := tokenFn c s - if s.hasError then (s.restore iniSz iniPos, none) + if let some e := s.errorMsg then (s.restore iniSz iniPos, Except.error s) else let stx := s.stxStack.back - (s.restore iniSz iniPos, some stx) + (s.restore iniSz iniPos, Except.ok stx) -def peekToken (c : ParserContext) (s : ParserState) : ParserState × Option Syntax := +def peekToken (c : ParserContext) (s : ParserState) : ParserState × Except ParserState Syntax := let tkc := s.cache.tokenCache if tkc.startPos == s.pos then - (s, some tkc.token) + (s, Except.ok tkc.token) else peekTokenAux c s @@ -1057,7 +1058,7 @@ def rawIdentFn : ParserFn := fun c s => let startPos := s.pos let s := tokenFn c s if s.hasError then - s.mkErrorsAt expected startPos + s else match s.stxStack.back with | Syntax.atom _ sym => if p sym then s else s.mkErrorsAt expected startPos @@ -1094,7 +1095,7 @@ def checkTailNoWs (prev : Syntax) : Bool := def nonReservedSymbolFnAux (sym : String) (errorMsg : String) : ParserFn := fun c s => let startPos := s.pos let s := tokenFn c s - if s.hasError then s.mkErrorAt errorMsg startPos + if s.hasError then s else match s.stxStack.back with | Syntax.atom _ sym' => @@ -1185,7 +1186,7 @@ def numLitFn : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s - if s.hasError || !(s.stxStack.back.isOfKind numLitKind) then s.mkErrorAt "numeral" iniPos else s + if !s.hasError && !(s.stxStack.back.isOfKind numLitKind) then s.mkErrorAt "numeral" iniPos else s @[inline] def numLitNoAntiquot : Parser := { fn := numLitFn, @@ -1196,7 +1197,7 @@ def scientificLitFn : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s - if s.hasError || !(s.stxStack.back.isOfKind scientificLitKind) then s.mkErrorAt "scientific number" iniPos else s + if !s.hasError && !(s.stxStack.back.isOfKind scientificLitKind) then s.mkErrorAt "scientific number" iniPos else s @[inline] def scientificLitNoAntiquot : Parser := { fn := scientificLitFn, @@ -1206,7 +1207,7 @@ def scientificLitFn : ParserFn := def strLitFn : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s - if s.hasError || !(s.stxStack.back.isOfKind strLitKind) then s.mkErrorAt "string literal" iniPos else s + if !s.hasError && !(s.stxStack.back.isOfKind strLitKind) then s.mkErrorAt "string literal" iniPos else s @[inline] def strLitNoAntiquot : Parser := { fn := strLitFn, @@ -1216,7 +1217,7 @@ def strLitFn : ParserFn := fun c s => def charLitFn : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s - if s.hasError || !(s.stxStack.back.isOfKind charLitKind) then s.mkErrorAt "character literal" iniPos else s + if !s.hasError && !(s.stxStack.back.isOfKind charLitKind) then s.mkErrorAt "character literal" iniPos else s @[inline] def charLitNoAntiquot : Parser := { fn := charLitFn, @@ -1226,7 +1227,7 @@ def charLitFn : ParserFn := fun c s => def nameLitFn : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s - if s.hasError || !(s.stxStack.back.isOfKind nameLitKind) then s.mkErrorAt "Name literal" iniPos else s + if !s.hasError && !(s.stxStack.back.isOfKind nameLitKind) then s.mkErrorAt "Name literal" iniPos else s @[inline] def nameLitNoAntiquot : Parser := { fn := nameLitFn, @@ -1236,7 +1237,7 @@ def nameLitFn : ParserFn := fun c s => def identFn : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s - if s.hasError || !(s.stxStack.back.isIdent) then s.mkErrorAt "identifier" iniPos else s + if !s.hasError && !(s.stxStack.back.isIdent) then s.mkErrorAt "identifier" iniPos else s @[inline] def identNoAntiquot : Parser := { fn := identFn, @@ -1251,7 +1252,7 @@ def identEqFn (id : Name) : ParserFn := fun c s => let iniPos := s.pos let s := tokenFn c s if s.hasError then - s.mkErrorAt "identifier" iniPos + s else match s.stxStack.back with | Syntax.ident _ _ val _ => if val != id then s.mkErrorAt ("expected identifier '" ++ toString id ++ "'") iniPos else s | _ => s.mkErrorAt "identifier" iniPos @@ -1535,8 +1536,8 @@ def indexed {α : Type} (map : TokenMap α) (c : ParserContext) (s : ParserState | some as => (s, as) | _ => (s, []) match stx with - | some (Syntax.atom _ sym) => find (Name.mkSimple sym) - | some (Syntax.ident _ _ val _) => + | Except.ok (Syntax.atom _ sym) => find (Name.mkSimple sym) + | Except.ok (Syntax.ident _ _ val _) => match behavior with | LeadingIdentBehavior.default => find identKind | LeadingIdentBehavior.symbol => @@ -1549,8 +1550,9 @@ def indexed {α : Type} (map : TokenMap α) (c : ParserContext) (s : ParserState | some as' => (s, as ++ as') | _ => (s, as) | none => find identKind - | some (Syntax.node k _) => find k - | _ => (s, []) + | Except.ok (Syntax.node k _) => find k + | Except.ok _ => (s, []) + | Except.error s' => (s', []) abbrev CategoryParserFn := Name → ParserFn @@ -1650,10 +1652,10 @@ def mkAntiquot (name : String) (kind : Option SyntaxNodeKind) (anonymous := true nameP def tryAnti (c : ParserContext) (s : ParserState) : Bool := - let (s, stx?) := peekToken c s - match stx? with - | some stx@(Syntax.atom _ sym) => sym == "$" - | _ => false + let (s, stx) := peekToken c s + match stx with + | Except.ok stx@(Syntax.atom _ sym) => sym == "$" + | _ => false @[inline] def withAntiquotFn (antiquotP p : ParserFn) : ParserFn := fun c s => if tryAnti c s then orelseFn antiquotP p c s else p c s @@ -1771,15 +1773,16 @@ private def mkResult (s : ParserState) (iniSz : Nat) : ParserState := if s.stackSize == iniSz + 1 then s else s.mkNode nullKind iniSz -- throw error instead? -def leadingParserAux (kind : Name) (tables : PrattParsingTables) (behavior : LeadingIdentBehavior) : ParserFn := fun c s => +def leadingParserAux (kind : Name) (tables : PrattParsingTables) (behavior : LeadingIdentBehavior) : ParserFn := fun c s => do let iniSz := s.stackSize let (s, ps) := indexed tables.leadingTable c s behavior + if s.hasError then + return s let ps := tables.leadingParsers ++ ps if ps.isEmpty then - s.mkError (toString kind) - else - let s := longestMatchFn none ps c s - mkResult s iniSz + return s.mkError (toString kind) + let s := longestMatchFn none ps c s + mkResult s iniSz @[inline] def leadingParser (kind : Name) (tables : PrattParsingTables) (behavior : LeadingIdentBehavior) (antiquotParser : ParserFn) : ParserFn := withAntiquotFn antiquotParser (leadingParserAux kind tables behavior) @@ -1795,20 +1798,25 @@ private def mkTrailingResult (s : ParserState) (iniSz : Nat) : ParserState := let s := s.popSyntax.popSyntax s.pushSyntax result -partial def trailingLoop (tables : PrattParsingTables) (c : ParserContext) (s : ParserState) : ParserState := +partial def trailingLoop (tables : PrattParsingTables) (c : ParserContext) (s : ParserState) : ParserState := do + let iniSz := s.stackSize + let iniPos := s.pos let (s, ps) := indexed tables.trailingTable c s LeadingIdentBehavior.default + if s.hasError then + -- Discard token parse errors and break the trailing loop instead. + -- The error will be flagged when the next leading position is parsed, unless the token + -- is in fact valid there (e.g. EOI at command level, no-longer forbidden token) + return s.restore iniSz iniPos if ps.isEmpty && tables.trailingParsers.isEmpty then - s -- no available trailing parser - else - let left := s.stxStack.back - let iniSz := s.stackSize - let iniPos := s.pos - let s := trailingLoopStep tables left ps c s - if s.hasError then - if s.pos == iniPos then s.restore iniSz iniPos else s - else - let s := mkTrailingResult s iniSz - trailingLoop tables c s + return s -- no available trailing parser + let left := s.stxStack.back + let s := trailingLoopStep tables left ps c s + if s.hasError then + -- Discard non-consuming parse errors and break the trailing loop instead. + -- This is necessary for fallback parsers like `app` that pretend to be always applicable. + return if s.pos == iniPos then s.restore iniSz iniPos else s + let s := mkTrailingResult s iniSz + trailingLoop tables c s /-- diff --git a/src/Lean/Parser/Extension.lean b/src/Lean/Parser/Extension.lean index bbee80ca75..3e1e0ee52d 100644 --- a/src/Lean/Parser/Extension.lean +++ b/src/Lean/Parser/Extension.lean @@ -535,12 +535,13 @@ def notFollowedByCategoryTokenFn (catName : Name) : ParserFn := fun ctx s => | some cat => let (s, stx) := peekToken ctx s match stx with - | some (Syntax.atom _ sym) => + | Except.ok (Syntax.atom _ sym) => if ctx.insideQuot && sym == "$" then s else match cat.tables.leadingTable.find? (Name.mkSimple sym) with | some _ => s.mkUnexpectedError (toString catName) | _ => s - | _ => s + | Except.ok _ => s + | Except.error _ => s @[inline] def notFollowedByCategoryToken (catName : Name) : Parser := { fn := notFollowedByCategoryTokenFn catName diff --git a/tests/lean/tokenErrors.lean b/tests/lean/tokenErrors.lean new file mode 100644 index 0000000000..ffef8930bd --- /dev/null +++ b/tests/lean/tokenErrors.lean @@ -0,0 +1,14 @@ +#check 'hi +example : Nat := 0 -- recover + +#check '\y' +example : Nat := 0 -- recover + +/- +-- these are all "fatal" +#check "hi +#check hi.« +-/ + +#print Nat +/- diff --git a/tests/lean/tokenErrors.lean.expected.out b/tests/lean/tokenErrors.lean.expected.out new file mode 100644 index 0000000000..a06f2f7709 --- /dev/null +++ b/tests/lean/tokenErrors.lean.expected.out @@ -0,0 +1,3 @@ +tokenErrors.lean:1:10: error: missing end of character literal +tokenErrors.lean:4:9: error: invalid escape sequence +tokenErrors.lean:15:0: error: end of input