perf: cache diagnostics in server

Reason 1: we were making quadratically many pretty-printer calls since each `publishMessages` would format the entire `MessageLog`. Reason 2: we want to avoid formatting each diagnostic twice, once as interactive, and once as plain LSP diagnostic.
2021-08-13 00:12:27 -04:00 · 2021-08-13 00:12:27 -04:00 · f7e9ba76dd
commit f7e9ba76dd
parent c316a8ea69
2 changed files with 101 additions and 83 deletions
--- a/src/Lean/Server/FileWorker.lean
+++ b/src/Lean/Server/FileWorker.lean
@ -51,50 +51,6 @@ open IO
 open Snapshots
 open Std (RBMap RBMap.empty)
 open JsonRpc
-open Widget (publishMessages)
-
-/- Asynchronous snapshot elaboration. -/
-section Elab
-  /-- Elaborates the next command after `parentSnap` and emits diagnostics into `hOut`. -/
-  private def nextCmdSnap (m : DocumentMeta) (parentSnap : Snapshot) (cancelTk : CancelToken) (hOut : FS.Stream) (rpcSesh : RpcSession)
-      : ExceptT ElabTaskError IO Snapshot := do
-    cancelTk.check
-    publishProgressAtPos m parentSnap.endPos hOut
-    let maybeSnap ← compileNextCmd m.text.source parentSnap
-    -- TODO(MH): check for interrupt with increased precision
-    cancelTk.check
-    match maybeSnap with
-    | Sum.inl snap =>
-      /- NOTE(MH): This relies on the client discarding old diagnostics upon receiving new ones
-        while prefering newer versions over old ones. The former is necessary because we do
-        not explicitly clear older diagnostics, while the latter is necessary because we do
-        not guarantee that diagnostics are emitted in order. Specifically, it may happen that
-        we interrupted this elaboration task right at this point and a newer elaboration task
-        emits diagnostics, after which we emit old diagnostics because we did not yet detect
-        the interrupt. Explicitly clearing diagnostics is difficult for a similar reason,
-        because we cannot guarantee that no further diagnostics are emitted after clearing
-        them. -/
-      if snap.msgLog.msgs.size > parentSnap.msgLog.msgs.size then
-        publishMessages m snap.msgLog hOut rpcSesh
-      snap
-    | Sum.inr msgLog =>
-      publishMessages m msgLog hOut rpcSesh
-      publishProgressDone m hOut
-      throw ElabTaskError.eof
-
-  /-- Elaborates all commands after `initSnap`, emitting the diagnostics into `hOut`. -/
-  def unfoldCmdSnaps (m : DocumentMeta) (initSnap : Snapshot) (cancelTk : CancelToken) (hOut : FS.Stream) (rpcSesh : RpcSession)
-    (initial : Bool) :
-      IO (AsyncList ElabTaskError Snapshot) := do
-    if initial && initSnap.msgLog.hasErrors then
-      -- treat header processing errors as fatal so users aren't swamped with followup errors
-      AsyncList.nil
-    else
-      AsyncList.unfoldAsync (nextCmdSnap m . cancelTk hOut rpcSesh) initSnap
-end Elab
-
-- Pending requests are tracked so they can be cancelled
-abbrev PendingRequestMap := RBMap RequestID (Task (Except IO.Error Unit)) compare

 structure WorkerContext where
  hIn           : FS.Stream
@ -102,6 +58,49 @@ structure WorkerContext where
  hLog          : FS.Stream
  srcSearchPath : SearchPath

+/- Asynchronous snapshot elaboration. -/
+section Elab
+  abbrev AsyncElabM := ExceptT ElabTaskError (ReaderT WorkerContext IO)
+
+  /-- Elaborates the next command after `parentSnap` and emits diagnostics into `hOut`. -/
+  private def nextCmdSnap (m : DocumentMeta) (parentSnap : Snapshot) (cancelTk : CancelToken)
+      : AsyncElabM Snapshot := do
+    cancelTk.check
+    let hOut := (←read).hOut
+    if parentSnap.isAtEnd then
+      publishDiagnostics m parentSnap.diagnostics.toArray hOut
+      publishProgressDone m hOut
+      throw ElabTaskError.eof
+    publishProgressAtPos m parentSnap.endPos hOut
+    let snap ← compileNextCmd m.text parentSnap
+    -- TODO(MH): check for interrupt with increased precision
+    cancelTk.check
+    /- NOTE(MH): This relies on the client discarding old diagnostics upon receiving new ones
+      while prefering newer versions over old ones. The former is necessary because we do
+      not explicitly clear older diagnostics, while the latter is necessary because we do
+      not guarantee that diagnostics are emitted in order. Specifically, it may happen that
+      we interrupted this elaboration task right at this point and a newer elaboration task
+      emits diagnostics, after which we emit old diagnostics because we did not yet detect
+      the interrupt. Explicitly clearing diagnostics is difficult for a similar reason,
+      because we cannot guarantee that no further diagnostics are emitted after clearing
+      them. -/
+    if snap.interactiveDiags.size > parentSnap.interactiveDiags.size then
+      publishDiagnostics m snap.diagnostics.toArray hOut
+    return snap
+
+  /-- Elaborates all commands after `initSnap`, emitting the diagnostics into `hOut`. -/
+  def unfoldCmdSnaps (m : DocumentMeta) (initSnap : Snapshot) (cancelTk : CancelToken) (initial : Bool)
+      : ReaderT WorkerContext IO (AsyncList ElabTaskError Snapshot) := do
+    if initial && initSnap.msgLog.hasErrors then
+      -- treat header processing errors as fatal so users aren't swamped with followup errors
+      AsyncList.nil
+    else
+      AsyncList.unfoldAsync (nextCmdSnap m . cancelTk (← read)) initSnap
+end Elab
+
+-- Pending requests are tracked so they can be cancelled
+abbrev PendingRequestMap := RBMap RequestID (Task (Except IO.Error Unit)) compare
+
 structure WorkerState where
  doc             : EditableDocument
  pendingRequests : PendingRequestMap
@ -170,7 +169,6 @@ section Initialization
    catch e =>  -- should be from `leanpkg print-paths`
      let msgs := MessageLog.empty.add { fileName := "<ignored>", pos := ⟨0, 0⟩, data := e.toString }
      pure (← mkEmptyEnvironment, msgs)
-    publishMessages m msgLog hOut rpcSesh
    let cmdState := Elab.Command.mkState headerEnv msgLog opts
    let cmdState := { cmdState with infoState.enabled := true, scopes := [{ header := "", opts := opts }] }
    let headerSnap := {
@ -178,7 +176,9 @@ section Initialization
      stx := headerStx
      mpState := headerParserState
      cmdState := cmdState
+      interactiveDiags := ← cmdState.messages.msgs.mapM (Widget.msgToInteractiveDiagnostic m.text)
    }
+    publishDiagnostics m headerSnap.diagnostics.toArray hOut
    return (headerSnap, srcSearchPath)

  def initializeWorker (meta : DocumentMeta) (i o e : FS.Stream)
@ -186,16 +186,16 @@ section Initialization
    let rpcSesh ← RpcSession.new false
    let (headerSnap, srcSearchPath) ← compileHeader meta o rpcSesh
    let cancelTk ← CancelToken.new
-    let cmdSnaps ← unfoldCmdSnaps meta headerSnap cancelTk o (initial := true) rpcSesh
+    let ctx :=
+      { hIn                := i
+        hOut               := o
+        hLog               := e
+        srcSearchPath      := srcSearchPath
+      }
+    let cmdSnaps ← unfoldCmdSnaps meta headerSnap cancelTk (initial := true) ctx
    let doc : EditableDocument := ⟨meta, headerSnap, cmdSnaps, cancelTk⟩
-    return ({
-      hIn                := i
-      hOut               := o
-      hLog               := e
-      srcSearchPath      := srcSearchPath
-    },
-    {
-      doc             := doc
+    return (ctx,
+    { doc             := doc
      pendingRequests := RBMap.empty
      rpcSesh
    })
@ -230,7 +230,7 @@ section Updates
      let mut validSnaps := cmdSnaps.finishedPrefix.takeWhile (fun s => s.endPos < changePos)
      if validSnaps.length = 0 then
        let cancelTk ← CancelToken.new
-        let newCmdSnaps ← unfoldCmdSnaps newMeta newHeaderSnap cancelTk ctx.hOut (initial := true) (← get).rpcSesh
+        let newCmdSnaps ← unfoldCmdSnaps newMeta newHeaderSnap cancelTk (initial := true) ctx
        modify fun st => { st with doc := ⟨newMeta, newHeaderSnap, newCmdSnaps, cancelTk⟩ }
      else
        /- When at least one valid non-header snap exists, it may happen that a change does not fall
@ -248,7 +248,7 @@ section Updates
          validSnaps ← validSnaps.dropLast
          lastSnap ← preLastSnap
        let cancelTk ← CancelToken.new
-        let newSnaps ← unfoldCmdSnaps newMeta lastSnap cancelTk ctx.hOut (initial := false) (← get).rpcSesh
+        let newSnaps ← unfoldCmdSnaps newMeta lastSnap cancelTk (initial := false) ctx
        let newCmdSnaps := AsyncList.ofList validSnaps ++ newSnaps
        modify fun st => { st with doc := ⟨newMeta, newHeaderSnap, newCmdSnaps, cancelTk⟩ }
 end Updates
--- a/src/Lean/Server/Snapshots.lean
+++ b/src/Lean/Server/Snapshots.lean
@ -9,6 +9,8 @@ import Init.System.IO
 import Lean.Elab.Import
 import Lean.Elab.Command

+import Lean.Widget.InteractiveDiagnostics
+
 /-! One can think of this module as being a partial reimplementation
 of Lean.Elab.Frontend which also stores a snapshot of the world after
 each command. Importantly, we allow (re)starting compilation from any
@ -20,7 +22,7 @@ open Elab

 /-- What Lean knows about the world after the header and each command. -/
 structure Snapshot where
-  /- Where the command which produced this snapshot begins. Note that
+  /-- Where the command which produced this snapshot begins. Note that
  neighbouring snapshots are *not* necessarily attached beginning-to-end,
  since inputs outside the grammar advance the parser but do not produce
  snapshots. -/
@ -28,6 +30,10 @@ structure Snapshot where
  stx : Syntax
  mpState : Parser.ModuleParserState
  cmdState : Command.State
+  /-- We cache interactive diagnostics in order not to invoke the pretty-printer again on messages
+  from previous snapshots when publishing diagnostics for every new snapshot (this is quadratic),
+  as well as not to invoke it once again when handling `$/lean/interactiveDiagnostics`. -/
+  interactiveDiags : Std.PersistentArray Widget.InteractiveDiagnostic
  deriving Inhabited

 namespace Snapshot
@ -41,6 +47,12 @@ def env (s : Snapshot) : Environment :=
 def msgLog (s : Snapshot) : MessageLog :=
  s.cmdState.messages

+def diagnostics (s : Snapshot) : Std.PersistentArray Lsp.Diagnostic :=
+  s.interactiveDiags.map fun d => d.toDiagnostic
+
+def isAtEnd (s : Snapshot) : Bool :=
+  Parser.isEOI s.stx || Parser.isExitCommand s.stx
+
 end Snapshot

 /-- Reparses the header syntax but does not re-elaborate it. Used to ignore whitespace-only changes. -/
@ -80,14 +92,13 @@ partial def parseAhead (contents : String) (snap : Snapshot) : IO (Array Syntax)
      else
        go inputCtx pmctx cmdParserState (stxs.push cmdStx)

-/-- Compiles the next command occurring after the given snapshot.
-If there is no next command (file ended), returns messages produced
-through the file. -/
+/-- Compiles the next command occurring after the given snapshot. If there is no next command
+(file ended), `Snapshot.isAtEnd` will hold of the return value. -/
 -- NOTE: This code is really very similar to Elab.Frontend. But generalizing it
 -- over "store snapshots"/"don't store snapshots" would likely result in confusing
 -- isServer? conditionals and not be worth it due to how short it is.
-def compileNextCmd (contents : String) (snap : Snapshot) : IO (Sum Snapshot MessageLog) := do
-  let inputCtx := Parser.mkInputContext contents "<input>"
+def compileNextCmd (text : FileMap) (snap : Snapshot) : IO Snapshot := do
+  let inputCtx := Parser.mkInputContext text.source "<input>"
  let cmdState := snap.cmdState
  let scope := cmdState.scopes.head!
  let pmctx := { env := cmdState.env, options := scope.opts, currNamespace := scope.currNamespace, openDecls := scope.openDecls }
@ -95,11 +106,18 @@ def compileNextCmd (contents : String) (snap : Snapshot) : IO (Sum Snapshot Mess
    Parser.parseCommand inputCtx pmctx snap.mpState snap.msgLog
  let cmdPos := cmdStx.getPos?.get!
  if Parser.isEOI cmdStx || Parser.isExitCommand cmdStx then
-    Sum.inr msgLog
+    let endSnap : Snapshot :=
+      { beginPos := cmdPos
+        stx := cmdStx
+        mpState := cmdParserState
+        cmdState := snap.cmdState
+        interactiveDiags := ← newInteractiveDiags msgLog
+      }
+    endSnap
  else
    let cmdStateRef ← IO.mkRef { snap.cmdState with messages := msgLog }
-    let cmdCtx : Elab.Command.Context := {
-        cmdPos   := snap.endPos
+    let cmdCtx : Elab.Command.Context :=
+      { cmdPos   := snap.endPos
        fileName := inputCtx.fileName
        fileMap  := inputCtx.fileMap
      }
@ -112,29 +130,29 @@ def compileNextCmd (contents : String) (snap : Snapshot) : IO (Sum Snapshot Mess
    if !output.isEmpty then
      postCmdState := {
        postCmdState with
-        messages := postCmdState.messages.add {
-          fileName := inputCtx.fileName
-          severity := MessageSeverity.information
-          pos      := inputCtx.fileMap.toPosition snap.endPos
-          data     := output
-        }
+        messages := postCmdState.messages.add
+          { fileName := inputCtx.fileName
+            severity := MessageSeverity.information
+            pos      := inputCtx.fileMap.toPosition snap.endPos
+            data     := output
+          }
      }
-    let postCmdSnap : Snapshot := {
-        beginPos := cmdPos
+    let postCmdSnap : Snapshot :=
+      { beginPos := cmdPos
        stx := cmdStx
        mpState := cmdParserState
        cmdState := postCmdState
+        interactiveDiags := ← newInteractiveDiags postCmdState.messages
      }
-    Sum.inl postCmdSnap
+    postCmdSnap

-/-- Compiles all commands after the given snapshot. Returns them as a list, together with
-the final message log. -/
-partial def compileCmdsAfter (contents : String) (snap : Snapshot) : IO (List Snapshot × MessageLog) := do
-  let cmdOut ← compileNextCmd contents snap
-  match cmdOut with
-  | Sum.inl snap =>
-    let (snaps, msgLog) ← compileCmdsAfter contents snap
-    (snap :: snaps, msgLog)
-  | Sum.inr msgLog => ([], msgLog)
+where
+  newInteractiveDiags (msgLog : MessageLog) : IO (Std.PersistentArray Widget.InteractiveDiagnostic) := do
+    let newMsgCount := msgLog.msgs.size - snap.msgLog.msgs.size
+    let mut ret := snap.interactiveDiags
+    for i in List.iota newMsgCount do
+      let newMsg := msgLog.msgs.get! (msgLog.msgs.size - i)
+      ret := ret.push (← Widget.msgToInteractiveDiagnostic text newMsg)
+    return ret

 end Lean.Server.Snapshots