/- Copyright (c) 2023 Lean FRO. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Implementation of the Lean language: parsing and processing of header and commands, incremental recompilation Authors: Sebastian Ullrich -/ prelude import Lean.Language.Basic import Lean.Parser.Module import Lean.Elab.Command import Lean.Elab.Import /-! # Note [Incremental Parsing] In the language server, we want to minimize the work we do after each edit by reusing previous state where possible. This is true for both parsing, i.e. reusing syntax trees without running the parser, and elaboration. For both, we currently assume that we have to reprocess at least everything from the point of change downwards. This note is about how to find the correct starting point for incremental parsing; for elaboration, we then start with the first changed syntax tree. One initial thought about incremental parsing could be that it's not necessary as parsing is very fast compared to elaboration; on mathlib we average 41ms parsing per 1000 LoC. But there are quite a few files >= 1kloc (up to 4.5kloc) in there, so near the end of such files lag from always reparsing from the beginning may very well be noticeable. So if we do want incremental parsing, another thought might be that a user edit can only invalidate commands at or after the location of the change. Unfortunately, that's not true; take the (partial) input `def a := b private def c`. If we remove the space after `private`, the two commands syntactically become one with an application of `privatedef` to `b` even though the edit was strictly after the end of the first command. So obviously we must include at least the extent of the token that made the parser stop parsing a command as well such that invalidating the private token invalidates the preceding command. Unfortunately this is not sufficient either, given the following input: ``` structure a where /-- b -/ @[c] private axiom d : Nat ``` This is a syntactically valid sequence of a field-less structure and a declaration. If we again delete the space after private, it becomes a syntactically correct structure with a single field privateaxiom! So clearly, because of uses of atomic in the grammar, an edit can affect a command syntax tree even across multiple tokens. Now, what we do today, and have done since Lean 3, is to always reparse the last command completely preceding the edit location. If its syntax tree is unchanged, we preserve its data and reprocess all following commands only, otherwise we reprocess it fully as well. This seems to have worked well so far but it does seem a bit arbitrary given that even if it works for our current grammar, it can certainly be extended in ways that break the assumption. Finally, a more actually principled and generic solution would be to invalidate a syntax tree when the parser has reached the edit location during parsing. If it did not, surely the edit cannot have an effect on the syntax tree in question. Sadly such a "high-water mark" parser position does not exist currently and likely it could at best be approximated by e.g. "furthest `tokenFn` parse". Thus we remain at "go two commands up" at this point. -/ set_option linter.missingDocs true namespace Lean.Language.Lean open Lean.Elab open Lean.Parser private def pushOpt (a? : Option α) (as : Array α) : Array α := match a? with | some a => as.push a | none => as /-- Option for capturing output to stderr during elaboration. -/ register_builtin_option stderrAsMessages : Bool := { defValue := true group := "server" descr := "(server) capture output to the Lean stderr channel (such as from `dbg_trace`) during elaboration of a command as a diagnostic message" } /-- Option for showing elaboration errors from partial syntax errors. -/ register_builtin_option showPartialSyntaxErrors : Bool := { defValue := false descr := "show elaboration errors from partial syntax trees (i.e. after parser recovery)" } /-! The hierarchy of Lean snapshot types -/ /-- Final state of processing of a command. -/ structure CommandFinishedSnapshot extends Snapshot where /-- Resulting elaboration state. -/ cmdState : Command.State deriving Nonempty instance : ToSnapshotTree CommandFinishedSnapshot where toSnapshotTree s := ⟨s.toSnapshot, #[]⟩ /-- State after processing a command's signature and before executing its tactic body, if any. Other commands should immediately proceed to `finished`. -/ -- TODO: tactics structure CommandSignatureProcessedSnapshot extends Snapshot where /-- State after processing is finished. -/ finishedSnap : SnapshotTask CommandFinishedSnapshot deriving Nonempty instance : ToSnapshotTree CommandSignatureProcessedSnapshot where toSnapshotTree s := ⟨s.toSnapshot, #[s.finishedSnap.map (sync := true) toSnapshotTree]⟩ /-- State after a command has been parsed. -/ structure CommandParsedSnapshotData extends Snapshot where /-- Syntax tree of the command. -/ stx : Syntax /-- Resulting parser state. -/ parserState : Parser.ModuleParserState /-- Signature processing task. -/ sigSnap : SnapshotTask CommandSignatureProcessedSnapshot deriving Nonempty /-- State after a command has been parsed. -/ -- workaround for lack of recursive structures inductive CommandParsedSnapshot where /-- Creates a command parsed snapshot. -/ | mk (data : CommandParsedSnapshotData) (nextCmdSnap? : Option (SnapshotTask CommandParsedSnapshot)) deriving Nonempty /-- The snapshot data. -/ abbrev CommandParsedSnapshot.data : CommandParsedSnapshot → CommandParsedSnapshotData | mk data _ => data /-- Next command, unless this is a terminal command. -/ -- It would be really nice to not make this depend on `sig.finished` where possible abbrev CommandParsedSnapshot.next? : CommandParsedSnapshot → Option (SnapshotTask CommandParsedSnapshot) | mk _ next? => next? partial instance : ToSnapshotTree CommandParsedSnapshot where toSnapshotTree := go where go s := ⟨s.data.toSnapshot, #[s.data.sigSnap.map (sync := true) toSnapshotTree] |> pushOpt (s.next?.map (·.map (sync := true) go))⟩ /-- Cancels all significant computations from this snapshot onwards. -/ partial def CommandParsedSnapshot.cancel (snap : CommandParsedSnapshot) : BaseIO Unit := do -- This is the only relevant computation right now -- TODO: cancel additional elaboration tasks if we add them without switching to implicit -- cancellation snap.data.sigSnap.cancel if let some next := snap.next? then -- recurse on next command (which may have been spawned just before we cancelled above) let _ ← IO.mapTask (sync := true) (·.cancel) next.task /-- State after successful importing. -/ structure HeaderProcessedState where /-- The resulting initial elaboration state. -/ cmdState : Command.State /-- First command task (there is always at least a terminal command). -/ firstCmdSnap : SnapshotTask CommandParsedSnapshot /-- State after the module header has been processed including imports. -/ structure HeaderProcessedSnapshot extends Snapshot where /-- State after successful importing. -/ result? : Option HeaderProcessedState isFatal := result?.isNone instance : ToSnapshotTree HeaderProcessedSnapshot where toSnapshotTree s := ⟨s.toSnapshot, #[] |> pushOpt (s.result?.map (·.firstCmdSnap.map (sync := true) toSnapshotTree))⟩ /-- State after successfully parsing the module header. -/ structure HeaderParsedState where /-- Resulting parser state. -/ parserState : Parser.ModuleParserState /-- Header processing task. -/ processedSnap : SnapshotTask HeaderProcessedSnapshot /-- State after the module header has been parsed. -/ structure HeaderParsedSnapshot extends Snapshot where /-- Parser input context supplied by the driver, stored here for incremental parsing. -/ ictx : Parser.InputContext /-- Resulting syntax tree. -/ stx : Syntax /-- State after successful parsing. -/ result? : Option HeaderParsedState isFatal := result?.isNone instance : ToSnapshotTree HeaderParsedSnapshot where toSnapshotTree s := ⟨s.toSnapshot, #[] |> pushOpt (s.result?.map (·.processedSnap.map (sync := true) toSnapshotTree))⟩ /-- Shortcut accessor to the final header state, if successful. -/ def HeaderParsedSnapshot.processedResult (snap : HeaderParsedSnapshot) : SnapshotTask (Option HeaderProcessedState) := snap.result?.bind (·.processedSnap.map (sync := true) (·.result?)) |>.getD (.pure none) /-- Initial snapshot of the Lean language processor: a "header parsed" snapshot. -/ abbrev InitialSnapshot := HeaderParsedSnapshot /-- Lean-specific processing context. -/ structure LeanProcessingContext extends ProcessingContext where /-- Position of the first file difference if there was a previous invocation. -/ firstDiffPos? : Option String.Pos /-- Monad transformer holding all relevant data for Lean processing. -/ abbrev LeanProcessingT m := ReaderT LeanProcessingContext m /-- Monad holding all relevant data for Lean processing. -/ abbrev LeanProcessingM := LeanProcessingT BaseIO instance : MonadLift LeanProcessingM (LeanProcessingT IO) where monadLift := fun act ctx => act ctx instance : MonadLift (ProcessingT m) (LeanProcessingT m) where monadLift := fun act ctx => act ctx.toProcessingContext /-- Returns true if there was a previous run and the given position is before any textual change compared to it. -/ def isBeforeEditPos (pos : String.Pos) : LeanProcessingM Bool := do return (← read).firstDiffPos?.any (pos < ·) /-- Adds unexpected exceptions from header processing to the message log as a last resort; standard errors should already have been caught earlier. -/ private def withHeaderExceptions (ex : Snapshot → α) (act : LeanProcessingT IO α) : LeanProcessingM α := do match (← (act (← read)).toBaseIO) with | .error e => return ex { diagnostics := (← diagnosticsOfHeaderError e.toString) } | .ok a => return a /-- Entry point of the Lean language processor. -/ /- General notes: * For each processing function we pass in the previous state, if any, in order to reuse still-valid state. As there is no cheap way to check whether the `Environment` is unchanged, i.e. *semantic* change detection is currently not possible, we must make sure to pass `none` as all follow-up "previous states" from the first *syntactic* change onwards. * We must make sure to use `CommandParsedSnapshot.cancel` on such tasks when discarding them, i.e. when not passing them along in `old?`. * Control flow up to finding the last still-valid snapshot (which should be quick) is synchronous so as not to report this "fast forwarding" to the user as well as to make sure the next run sees all fast-forwarded snapshots without having to wait on tasks. -/ partial def process (setupImports : Syntax → ProcessingT IO (Except HeaderProcessedSnapshot Options) := fun _ => pure <| .ok {}) (old? : Option InitialSnapshot) : ProcessingM InitialSnapshot := do -- compute position of syntactic change once let firstDiffPos? := old?.map (·.ictx.input.firstDiffPos (← read).input) ReaderT.adapt ({ · with firstDiffPos? }) do parseHeader old? where parseHeader (old? : Option HeaderParsedSnapshot) : LeanProcessingM HeaderParsedSnapshot := do let ctx ← read let ictx := ctx.toInputContext let unchanged old := -- when header syntax is unchanged, reuse import processing task as is and continue with -- parsing the first command, synchronously if possible if let some oldSuccess := old.result? then return { old with ictx, result? := some { oldSuccess with processedSnap := (← oldSuccess.processedSnap.bindIO (sync := true) fun oldProcessed => do if let some oldProcSuccess := oldProcessed.result? then -- also wait on old command parse snapshot as parsing is cheap and may allow for -- elaboration reuse oldProcSuccess.firstCmdSnap.bindIO (sync := true) fun oldCmd => return .pure { oldProcessed with result? := some { oldProcSuccess with firstCmdSnap := (← parseCmd oldCmd oldSuccess.parserState oldProcSuccess.cmdState ctx) } } else return .pure oldProcessed) } } else return old -- fast path: if we have parsed the header successfully... if let some old := old? then if let some (some processed) ← old.processedResult.get? then -- ...and the edit location is after the next command (see note [Incremental Parsing])... if let some nextCom ← processed.firstCmdSnap.get? then if (← isBeforeEditPos nextCom.data.parserState.pos) then -- ...go immediately to next snapshot return (← unchanged old) withHeaderExceptions ({ · with ictx, stx := .missing, result? := none }) do -- parsing the header should be cheap enough to do synchronously let (stx, parserState, msgLog) ← Parser.parseHeader ictx if msgLog.hasErrors then return { ictx, stx diagnostics := (← Snapshot.Diagnostics.ofMessageLog msgLog) result? := none } -- semi-fast path: go to next snapshot if syntax tree is unchanged AND we're still in front -- of the edit location -- TODO: dropping the second condition would require adjusting positions in the state -- NOTE: as `parserState.pos` includes trailing whitespace, this forces reprocessing even if -- only that whitespace changes, which is wasteful but still necessary because it may -- influence the range of error messages such as from a trailing `exact` if let some old := old? then if (← isBeforeEditPos parserState.pos) && old.stx == stx then return (← unchanged old) -- on first change, make sure to cancel all further old tasks if let some oldSuccess := old.result? then oldSuccess.processedSnap.cancel let _ ← BaseIO.mapTask (t := oldSuccess.processedSnap.task) fun processed => do if let some oldProcSuccess := processed.result? then let _ ← BaseIO.mapTask (·.cancel) oldProcSuccess.firstCmdSnap.task return { ictx, stx diagnostics := (← Snapshot.Diagnostics.ofMessageLog msgLog) result? := some { parserState processedSnap := (← processHeader stx parserState) } } processHeader (stx : Syntax) (parserState : Parser.ModuleParserState) : LeanProcessingM (SnapshotTask HeaderProcessedSnapshot) := do let ctx ← read SnapshotTask.ofIO ⟨0, ctx.input.endPos⟩ <| ReaderT.run (r := ctx) <| -- re-enter reader in new task withHeaderExceptions (α := HeaderProcessedSnapshot) ({ · with result? := none }) do let opts ← match (← setupImports stx) with | .ok opts => pure opts | .error snap => return snap -- override context options with file options let opts := ctx.opts.mergeBy (fun _ _ fileOpt => fileOpt) opts -- allows `headerEnv` to be leaked, which would live until the end of the process anyway let (headerEnv, msgLog) ← Elab.processHeader (leakEnv := true) stx opts .empty ctx.toInputContext ctx.trustLevel let diagnostics := (← Snapshot.Diagnostics.ofMessageLog msgLog) if msgLog.hasErrors then return { diagnostics, result? := none } let headerEnv := headerEnv.setMainModule ctx.mainModuleName let cmdState := Elab.Command.mkState headerEnv msgLog opts let cmdState := { cmdState with infoState := { enabled := true trees := #[Elab.InfoTree.context (.commandCtx { env := headerEnv fileMap := ctx.fileMap ngen := { namePrefix := `_import } }) (Elab.InfoTree.node (Elab.Info.ofCommandInfo { elaborator := `header, stx }) (stx[1].getArgs.toList.map (fun importStx => Elab.InfoTree.node (Elab.Info.ofCommandInfo { elaborator := `import stx := importStx }) #[].toPArray' )).toPArray' )].toPArray' }} return { diagnostics infoTree? := cmdState.infoState.trees[0]! result? := some { cmdState firstCmdSnap := (← parseCmd none parserState cmdState) } } parseCmd (old? : Option CommandParsedSnapshot) (parserState : Parser.ModuleParserState) (cmdState : Command.State) : LeanProcessingM (SnapshotTask CommandParsedSnapshot) := do let ctx ← read -- check for cancellation, most likely during elaboration of previous command, before starting -- processing of next command if (← IO.checkCanceled) then -- this is a bit ugly as we don't want to adjust our API with `Option`s just for cancellation -- (as no-one should look at this result in that case) but anything containing `Environment` -- is not `Inhabited` return .pure <| .mk (nextCmdSnap? := none) { diagnostics := .empty, stx := .missing, parserState sigSnap := .pure { diagnostics := .empty finishedSnap := .pure { diagnostics := .empty, cmdState } } } let unchanged old : BaseIO CommandParsedSnapshot := -- when syntax is unchanged, reuse command processing task as is if let some oldNext := old.next? then return .mk (data := old.data) (nextCmdSnap? := (← old.data.sigSnap.bindIO (sync := true) fun oldSig => oldSig.finishedSnap.bindIO (sync := true) fun oldFinished => -- also wait on old command parse snapshot as parsing is cheap and may allow for -- elaboration reuse oldNext.bindIO (sync := true) fun oldNext => do parseCmd oldNext old.data.parserState oldFinished.cmdState ctx)) else return old -- terminal command, we're done! -- fast path, do not even start new task for this snapshot if let some old := old? then if let some nextCom ← old.next?.bindM (·.get?) then if (← isBeforeEditPos nextCom.data.parserState.pos) then return .pure (← unchanged old) SnapshotTask.ofIO ⟨parserState.pos, ctx.input.endPos⟩ do let beginPos := parserState.pos let scope := cmdState.scopes.head! let pmctx := { env := cmdState.env, options := scope.opts, currNamespace := scope.currNamespace openDecls := scope.openDecls } let (stx, parserState, msgLog) := Parser.parseCommand ctx.toInputContext pmctx parserState .empty -- semi-fast path if let some old := old? then if (← isBeforeEditPos parserState.pos ctx) && old.data.stx == stx then return (← unchanged old) -- on first change, make sure to cancel all further old tasks old.cancel let sigSnap ← processCmdSignature stx cmdState msgLog.hasErrors beginPos ctx let next? ← if Parser.isTerminalCommand stx then pure none -- for now, wait on "command finished" snapshot before parsing next command else some <$> (sigSnap.bind (·.finishedSnap)).bindIO fun finished => parseCmd none parserState finished.cmdState ctx return .mk (nextCmdSnap? := next?) { diagnostics := (← Snapshot.Diagnostics.ofMessageLog msgLog ctx.toProcessingContext) stx parserState sigSnap } processCmdSignature (stx : Syntax) (cmdState : Command.State) (hasParseError : Bool) (beginPos : String.Pos) : LeanProcessingM (SnapshotTask CommandSignatureProcessedSnapshot) := do let ctx ← read -- signature elaboration task; for now, does full elaboration -- TODO: do tactic snapshots, reuse old state for them SnapshotTask.ofIO (stx.getRange?.getD ⟨beginPos, beginPos⟩) do let scope := cmdState.scopes.head! let cmdStateRef ← IO.mkRef { cmdState with messages := .empty } let cmdCtx : Elab.Command.Context := { ctx with cmdPos := beginPos, tacticCache? := none } let (output, _) ← IO.FS.withIsolatedStreams (isolateStderr := stderrAsMessages.get scope.opts) do liftM (m := BaseIO) do Elab.Command.catchExceptions (getResetInfoTrees *> Elab.Command.elabCommandTopLevel stx) cmdCtx cmdStateRef let cmdState ← cmdStateRef.get let mut messages := cmdState.messages -- `stx.hasMissing` should imply `hasParseError`, but the latter should be cheaper to check in -- general if !showPartialSyntaxErrors.get cmdState.scopes[0]!.opts && hasParseError && stx.hasMissing then -- discard elaboration errors, except for a few important and unlikely misleading ones, on -- parse error messages := ⟨messages.msgs.filter fun msg => msg.data.hasTag (fun tag => tag == `Elab.synthPlaceholder || tag == `Tactic.unsolvedGoals || (`_traceMsg).isSuffixOf tag)⟩ if !output.isEmpty then messages := messages.add { fileName := ctx.fileName severity := MessageSeverity.information pos := ctx.fileMap.toPosition beginPos data := output } let cmdState := { cmdState with messages } return { diagnostics := .empty finishedSnap := .pure { diagnostics := (← Snapshot.Diagnostics.ofMessageLog cmdState.messages ctx.toProcessingContext) infoTree? := some cmdState.infoState.trees[0]! cmdState } } /-- Waits for and returns final environment, if importing was successful. -/ partial def waitForFinalEnv? (snap : InitialSnapshot) : Option Environment := do let snap ← snap.result? let snap ← snap.processedSnap.get.result? goCmd snap.firstCmdSnap.get where goCmd snap := if let some next := snap.next? then goCmd next.get else snap.data.sigSnap.get.finishedSnap.get.cmdState.env end Lean