diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 80fa360fb4..283f2cf2c6 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -870,16 +870,14 @@ def rawIdentFn : ParserFn := fun c s => if input.atEnd i then s.mkEOIError else identFnAux i none .anonymous c s -def satisfySymbolFn (p : String → Bool) (expected : List String) : ParserFn := fun c s => - let initStackSz := s.stackSize - let startPos := s.pos - let s := tokenFn expected c s +def satisfySymbolFn (p : String → Bool) (expected : List String) : ParserFn := fun c s => Id.run do + let s := tokenFn expected c s if s.hasError then - s - else - match s.stxStack.back with - | .atom _ sym => if p sym then s else s.mkErrorsAt expected startPos initStackSz - | _ => s.mkErrorsAt expected startPos initStackSz + return s + if let .atom _ sym := s.stxStack.back then + if p sym then + return s + s.mkUnexpectedTokenErrors expected def symbolFnAux (sym : String) (errorMsg : String) : ParserFn := satisfySymbolFn (fun s => s == sym) [errorMsg] @@ -909,22 +907,20 @@ def checkTailNoWs (prev : Syntax) : Bool := For example, the universe `max` Function is parsed using this combinator so that it can still be used as an identifier outside of universe (but registering it as a token in a Term Syntax would not break the universe Parser). -/ -def nonReservedSymbolFnAux (sym : String) (errorMsg : String) : ParserFn := fun c s => - let initStackSz := s.stackSize - let startPos := s.pos +def nonReservedSymbolFnAux (sym : String) (errorMsg : String) : ParserFn := fun c s => Id.run do let s := tokenFn [errorMsg] c s - if s.hasError then s - else - match s.stxStack.back with - | .atom _ sym' => - if sym == sym' then s else s.mkErrorAt errorMsg startPos initStackSz - | .ident info rawVal _ _ => - if sym == rawVal.toString then - let s := s.popSyntax - s.pushSyntax (Syntax.atom info sym) - else - s.mkErrorAt errorMsg startPos initStackSz - | _ => s.mkErrorAt errorMsg startPos initStackSz + if s.hasError then + return s + match s.stxStack.back with + | .atom _ sym' => + if sym == sym' then + return s + | .ident info rawVal _ _ => + if sym == rawVal.toString then + let s := s.popSyntax + return s.pushSyntax (Syntax.atom info sym) + | _ => () + s.mkUnexpectedTokenError errorMsg def nonReservedSymbolFn (sym : String) : ParserFn := nonReservedSymbolFnAux sym ("'" ++ sym ++ "'") @@ -1028,66 +1024,49 @@ def unicodeSymbolNoAntiquot (sym asciiSym : String) : Parser := def mkAtomicInfo (k : String) : ParserInfo := { firstTokens := FirstTokens.tokens [ k ] } -def numLitFn : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos - let s := tokenFn ["numeral"] c s - if !s.hasError && !(s.stxStack.back.isOfKind numLitKind) then s.mkErrorAt "numeral" iniPos initStackSz else s +/-- + Parses a token and asserts the result is of the given kind. + `desc` is used in error messages as the token kind. -/ +def expectTokenFn (k : SyntaxNodeKind) (desc : String) : ParserFn := fun c s => + let s := tokenFn [desc] c s + if !s.hasError && !(s.stxStack.back.isOfKind k) then s.mkUnexpectedTokenError desc else s + +def numLitFn : ParserFn := expectTokenFn numLitKind "numeral" def numLitNoAntiquot : Parser := { fn := numLitFn info := mkAtomicInfo "num" } -def scientificLitFn : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos - let s := tokenFn ["scientific number"] c s - if !s.hasError && !(s.stxStack.back.isOfKind scientificLitKind) then s.mkErrorAt "scientific number" iniPos initStackSz else s +def scientificLitFn : ParserFn := expectTokenFn scientificLitKind "scientific number" def scientificLitNoAntiquot : Parser := { fn := scientificLitFn info := mkAtomicInfo "scientific" } -def strLitFn : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos - let s := tokenFn ["string literal"] c s - if !s.hasError && !(s.stxStack.back.isOfKind strLitKind) then s.mkErrorAt "string literal" iniPos initStackSz else s +def strLitFn : ParserFn := expectTokenFn strLitKind "string literal" def strLitNoAntiquot : Parser := { fn := strLitFn info := mkAtomicInfo "str" } -def charLitFn : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos - let s := tokenFn ["char literal"] c s - if !s.hasError && !(s.stxStack.back.isOfKind charLitKind) then s.mkErrorAt "character literal" iniPos initStackSz else s +def charLitFn : ParserFn := expectTokenFn charLitKind "character literal" def charLitNoAntiquot : Parser := { fn := charLitFn info := mkAtomicInfo "char" } -def nameLitFn : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos - let s := tokenFn ["Name literal"] c s - if !s.hasError && !(s.stxStack.back.isOfKind nameLitKind) then s.mkErrorAt "Name literal" iniPos initStackSz else s +def nameLitFn : ParserFn := expectTokenFn nameLitKind "Name literal" def nameLitNoAntiquot : Parser := { fn := nameLitFn info := mkAtomicInfo "name" } -def identFn : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos - let s := tokenFn ["identifier"] c s - if !s.hasError && !(s.stxStack.back.isIdent) then s.mkErrorAt "identifier" iniPos initStackSz else s +def identFn : ParserFn := expectTokenFn identKind "identifier" def identNoAntiquot : Parser := { fn := identFn @@ -1099,14 +1078,12 @@ def rawIdentNoAntiquot : Parser := { } def identEqFn (id : Name) : ParserFn := fun c s => - let initStackSz := s.stackSize - let iniPos := s.pos let s := tokenFn ["identifier"] c s if s.hasError then s else match s.stxStack.back with - | .ident _ _ val _ => if val != id then s.mkErrorAt ("expected identifier '" ++ toString id ++ "'") iniPos initStackSz else s - | _ => s.mkErrorAt "identifier" iniPos initStackSz + | .ident _ _ val _ => if val != id then s.mkUnexpectedTokenError s!"identifier '{id}'" else s + | _ => s.mkUnexpectedTokenError "identifier" def identEq (id : Name) : Parser := { fn := identEqFn id @@ -1688,7 +1665,11 @@ def leadingParserAux (kind : Name) (tables : PrattParsingTables) (behavior : Lea return s let ps := tables.leadingParsers ++ ps if ps.isEmpty then - return s.mkError (toString kind) + -- if there are no applicable parsers, consume the leading token and flag it as unexpected at this position + let s := tokenFn [toString kind] c s + if s.hasError then + return s + return s.mkUnexpectedTokenError (toString kind) let s := longestMatchFn none ps c s mkResult s iniSz diff --git a/src/Lean/Parser/Types.lean b/src/Lean/Parser/Types.lean index a57f42c604..5484f007a3 100644 --- a/src/Lean/Parser/Types.lean +++ b/src/Lean/Parser/Types.lean @@ -73,6 +73,8 @@ structure ParserContextCore extends InputContext, ParserModuleContext, Cacheable structure ParserContext extends ParserContextCore where private mk :: structure Error where + /-- If set, the corresponding range is reported. The start position is always `ParserState.pos`. -/ + stopPos? : Option String.Pos := none unexpected : String := "" expected : List String := [] deriving Inhabited, BEq @@ -98,7 +100,8 @@ instance : ToString Error where def merge (e₁ e₂ : Error) : Error := match e₂ with - | { unexpected := u, .. } => { unexpected := if u == "" then e₁.unexpected else u, expected := e₁.expected ++ e₂.expected } + -- We expect errors to be merged to be about the same token, so unconditionally copy second `stopPos?` + | { stopPos?, unexpected := u, .. } => { stopPos?, unexpected := if u == "" then e₁.unexpected else u, expected := e₁.expected ++ e₂.expected } end Error @@ -284,6 +287,21 @@ def mkErrorsAt (s : ParserState) (ex : List String) (pos : String.Pos) (initStac def mkErrorAt (s : ParserState) (msg : String) (pos : String.Pos) (initStackSz? : Option Nat := none) : ParserState := s.mkErrorsAt [msg] pos initStackSz? +/-- + Reports given 'expected' messages at range of top stack element (assumed to be a single token). + Replaces the element with `missing`. -/ +def mkUnexpectedTokenErrors (s : ParserState) (ex : List String) : ParserState := + let tk := s.stxStack.back + let s := s.setPos tk.getPos?.get! + let s := s.setError { stopPos? := tk.getTailPos?, expected := ex } + s.popSyntax.pushSyntax .missing + +/-- + Reports given 'expected' message at range of top stack element (assumed to be a single token). + Replaces the element with `missing`. -/ +def mkUnexpectedTokenError (s : ParserState) (msg : String) : ParserState := + s.mkUnexpectedTokenErrors [msg] + def mkUnexpectedErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState := s.setPos pos |>.mkUnexpectedError msg diff --git a/stage0/src/stdlib_flags.h b/stage0/src/stdlib_flags.h index a574103a7f..296e3f9b3a 100644 --- a/stage0/src/stdlib_flags.h +++ b/stage0/src/stdlib_flags.h @@ -6,7 +6,7 @@ options get_default_options() { // see https://leanprover.github.io/lean4/doc/dev/bootstrap.html#further-bootstrapping-complications #if LEAN_IS_STAGE0 == 1 // switch to `true` for ABI-breaking changes affecting meta code - opts = opts.update({"interpreter", "prefer_native"}, false); + opts = opts.update({"interpreter", "prefer_native"}, true); // switch to `true` for changing built-in parsers used in quotations opts = opts.update({"internal", "parseQuotWithCurrentStage"}, false); // toggling `parseQuotWithCurrentStage` may also require toggling the following option if macros/syntax