From a0440ea4ea4a760c8a3427e86e679c6a8b3a0dde Mon Sep 17 00:00:00 2001 From: tydeu Date: Tue, 22 Aug 2023 18:03:34 -0400 Subject: [PATCH] feat: lake: cache built file hashes --- src/lake/Lake/Build/Common.lean | 49 ++++++++++++++++++++++++-------- src/lake/Lake/Build/Context.lean | 22 ++++++++++++-- src/lake/Lake/Build/Module.lean | 9 ++++-- src/lake/Lake/Build/Monad.lean | 26 +++++++---------- src/lake/Lake/CLI/Actions.lean | 4 +-- src/lake/Lake/CLI/Help.lean | 1 + src/lake/Lake/CLI/Main.lean | 23 +++++++++++---- src/lake/Lake/CLI/Serve.lean | 10 +++---- 8 files changed, 97 insertions(+), 47 deletions(-) diff --git a/src/lake/Lake/Build/Common.lean b/src/lake/Lake/Build/Common.lean index c0e3b2aa1d..8842de1da2 100644 --- a/src/lake/Lake/Build/Common.lean +++ b/src/lake/Lake/Build/Common.lean @@ -14,24 +14,49 @@ namespace Lake def inputFile (path : FilePath) : SchedulerM (BuildJob FilePath) := Job.async <| (path, ·) <$> computeTrace path -@[specialize] def buildUnlessUpToDate [CheckExists ι] [GetMTime ι] (info : ι) +/-- Check if the `info` is up-to-date by comparing `depTrace` with `traceFile`. -/ +@[specialize] def BuildTrace.checkUpToDate [CheckExists ι] [GetMTime ι] +(info : ι) (depTrace : BuildTrace) (traceFile : FilePath) : JobM Bool := do + if (← getIsOldMode) then + depTrace.checkAgainstTime info + else + depTrace.checkAgainstFile info traceFile + +@[inline] def buildUnlessUpToDate [CheckExists ι] [GetMTime ι] (info : ι) (depTrace : BuildTrace) (traceFile : FilePath) (build : JobM PUnit) : JobM PUnit := do - let isOldMode ← getIsOldMode - let upToDate ← - if isOldMode then - depTrace.checkAgainstTime info - else - depTrace.checkAgainstFile info traceFile - unless upToDate do + unless (← depTrace.checkUpToDate info traceFile) do build - unless isOldMode do - depTrace.writeToFile traceFile + depTrace.writeToFile traceFile + +/-- Fetch the trace of a file that may have its hash already cached in a `.hash` file. -/ +def fetchFileTrace (file : FilePath) : BuildM BuildTrace := do + if (← getTrustHash) then + let hashFile := FilePath.mk <| file.toString ++ ".hash" + if let some hash ← Hash.load? hashFile then + return .mk hash (← getMTime file) + else + let hash ← computeHash file + IO.FS.writeFile hashFile hash.toString + return .mk hash (← getMTime file) + else + computeTrace file + +/-- Compute the hash of a file and save it to a `.hash` file. -/ +def cacheFileHash (file : FilePath) : IO Hash := do + let hash ← computeHash file + let hashFile := FilePath.mk <| file.toString ++ ".hash" + IO.FS.writeFile hashFile hash.toString + return hash def buildFileUnlessUpToDate (file : FilePath) (depTrace : BuildTrace) (build : BuildM PUnit) : BuildM BuildTrace := do let traceFile := FilePath.mk <| file.toString ++ ".trace" - buildUnlessUpToDate file depTrace traceFile build - computeTrace file + if (← depTrace.checkUpToDate file traceFile) then + fetchFileTrace file + else + build + depTrace.writeToFile traceFile + return .mk (← cacheFileHash file) (← getMTime file) @[inline] def buildFileAfterDep (file : FilePath) (dep : BuildJob α) (build : α → BuildM PUnit) diff --git a/src/lake/Lake/Build/Context.lean b/src/lake/Lake/Build/Context.lean index 8885a9957a..a775c2d2ee 100644 --- a/src/lake/Lake/Build/Context.lean +++ b/src/lake/Lake/Build/Context.lean @@ -15,16 +15,32 @@ import Lake.Build.Topological open System namespace Lake -/-- A Lake context with some additional caching for builds. -/ -structure BuildContext extends Context where - leanTrace : BuildTrace +/-- Configuration options for a Lake build. -/ +structure BuildConfig where oldMode : Bool := false + trustHash : Bool := true + +/-- A Lake context with a build configuration and additional build data. -/ +structure BuildContext extends BuildConfig, Context where + leanTrace : BuildTrace startedBuilds : IO.Ref Nat finishedBuilds : IO.Ref Nat /-- A transformer to equip a monad with a `BuildContext`. -/ abbrev BuildT := ReaderT BuildContext +@[inline] def getLeanTrace [Monad m] : BuildT m BuildTrace := + (·.leanTrace) <$> readThe BuildContext + +@[inline] def getBuildConfig [Monad m] : BuildT m BuildConfig := + (·.toBuildConfig) <$> readThe BuildContext + +@[inline] def getIsOldMode [Monad m] : BuildT m Bool := + (·.oldMode) <$> getBuildConfig + +@[inline] def getTrustHash [Monad m] : BuildT m Bool := + (·.trustHash) <$> getBuildConfig + /-- The monad for the Lake build manager. -/ abbrev SchedulerM := BuildT <| LogT BaseIO diff --git a/src/lake/Lake/Build/Module.lean b/src/lake/Lake/Build/Module.lean index 35cafbe7ee..271aa6edcf 100644 --- a/src/lake/Lake/Build/Module.lean +++ b/src/lake/Lake/Build/Module.lean @@ -141,6 +141,9 @@ def Module.recBuildLean (mod : Module) : IndexBuildM (BuildJob Unit) := do buildUnlessUpToDate mod modTrace mod.traceFile do compileLeanModule mod.name.toString mod.leanFile mod.oleanFile mod.ileanFile mod.cFile (← getLeanPath) mod.rootDir dynlibs dynlibPath (mod.leanArgs ++ mod.weakLeanArgs) (← getLean) + discard <| cacheFileHash mod.oleanFile + discard <| cacheFileHash mod.ileanFile + discard <| cacheFileHash mod.cFile return ((), depTrace) /-- The `ModuleFacetConfig` for the builtin `leanArtsFacet`. -/ @@ -151,20 +154,20 @@ def Module.leanArtsFacetConfig : ModuleFacetConfig leanArtsFacet := def Module.oleanFacetConfig : ModuleFacetConfig oleanFacet := mkFacetJobConfigSmall fun mod => do (← mod.leanArts.fetch).bindSync fun _ depTrace => - return (mod.oleanFile, mixTrace (← computeTrace mod.oleanFile) depTrace) + return (mod.oleanFile, mixTrace (← fetchFileTrace mod.oleanFile) depTrace) /-- The `ModuleFacetConfig` for the builtin `ileanFacet`. -/ def Module.ileanFacetConfig : ModuleFacetConfig ileanFacet := mkFacetJobConfigSmall fun mod => do (← mod.leanArts.fetch).bindSync fun _ depTrace => - return (mod.ileanFile, mixTrace (← computeTrace mod.ileanFile) depTrace) + return (mod.ileanFile, mixTrace (← fetchFileTrace mod.ileanFile) depTrace) /-- The `ModuleFacetConfig` for the builtin `cFacet`. -/ def Module.cFacetConfig : ModuleFacetConfig cFacet := mkFacetJobConfigSmall fun mod => do (← mod.leanArts.fetch).bindSync fun _ _ => -- do content-aware hashing so that we avoid recompiling unchanged C files - return (mod.cFile, ← computeTrace mod.cFile) + return (mod.cFile, ← fetchFileTrace mod.cFile) /-- Recursively build the module's object file from its C file produced by `lean`. -/ def Module.recBuildLeanO (self : Module) : IndexBuildM (BuildJob FilePath) := do diff --git a/src/lake/Lake/Build/Monad.lean b/src/lake/Lake/Build/Monad.lean index ab335aece2..e68eaff900 100644 --- a/src/lake/Lake/Build/Monad.lean +++ b/src/lake/Lake/Build/Monad.lean @@ -11,21 +11,15 @@ open System namespace Lake -def mkBuildContext (ws : Workspace) (oldMode : Bool) : IO BuildContext := do - let lean := ws.lakeEnv.lean - let leanTrace := Hash.ofString lean.githash +def mkBuildContext (ws : Workspace) (config : BuildConfig) : IO BuildContext := do return { - opaqueWs := ws, leanTrace, oldMode + opaqueWs := ws, + toBuildConfig := config, startedBuilds := ← IO.mkRef 0 - finishedBuilds := ← IO.mkRef 0 + finishedBuilds := ← IO.mkRef 0, + leanTrace := Hash.ofString ws.lakeEnv.lean.githash } -@[inline] def getLeanTrace : BuildM BuildTrace := - (·.leanTrace) <$> readThe BuildContext - -@[inline] def getIsOldMode : BuildM Bool := - (·.oldMode) <$> readThe BuildContext - def failOnBuildCycle [ToString k] : Except (List k) α → BuildM α | Except.ok a => pure a | Except.error cycle => do @@ -92,10 +86,10 @@ where self.root.buildDir / lockFileName /-- Run the given build function in the Workspace's context. -/ -@[inline] def Workspace.runBuild (ws : Workspace) (build : BuildM α) (oldMode := false) : LogIO α := do - let ctx ← mkBuildContext ws oldMode +@[inline] def Workspace.runBuild (ws : Workspace) (build : BuildM α) (config : BuildConfig := {}) : LogIO α := do + let ctx ← mkBuildContext ws config /- - TODO: + TODO: The lock file has been temporarily disabled (by lean4#2445) until we have an API for catching `Ctrl-C` during a build. Absent this, the lock file was too disruptive for users. @@ -104,5 +98,5 @@ where build.run ctx /-- Run the given build function in the Lake monad's workspace. -/ -@[inline] def runBuild (build : BuildM α) (oldMode := false) : LakeT LogIO α := do - (← getWorkspace).runBuild build oldMode +@[inline] def runBuild (build : BuildM α) (config : BuildConfig := {}) : LakeT LogIO α := do + (← getWorkspace).runBuild build config diff --git a/src/lake/Lake/CLI/Actions.lean b/src/lake/Lake/CLI/Actions.lean index c638330a6c..bd99ea6c12 100644 --- a/src/lake/Lake/CLI/Actions.lean +++ b/src/lake/Lake/CLI/Actions.lean @@ -10,10 +10,10 @@ namespace Lake def env (cmd : String) (args : Array String := #[]) : LakeT IO UInt32 := do IO.Process.spawn {cmd, args, env := ← getAugmentedEnv} >>= (·.wait) -def exe (name : Name) (args : Array String := #[]) (oldMode := false) : LakeT LogIO UInt32 := do +def exe (name : Name) (args : Array String := #[]) (buildConfig : BuildConfig := {}) : LakeT LogIO UInt32 := do let ws ← getWorkspace if let some exe := ws.findLeanExe? name then - let exeFile ← ws.runBuild (exe.build >>= (·.await)) oldMode + let exeFile ← ws.runBuild (exe.build >>= (·.await)) buildConfig env exeFile.toString args else error s!"unknown executable `{name}`" diff --git a/src/lake/Lake/CLI/Help.lean b/src/lake/Lake/CLI/Help.lean index b87912a667..df7bcf23bf 100644 --- a/src/lake/Lake/CLI/Help.lean +++ b/src/lake/Lake/CLI/Help.lean @@ -23,6 +23,7 @@ OPTIONS: --lean=cmd specify the `lean` command used by Lake -K key[=value] set the configuration file option named key --old only rebuild modified modules (ignore transitive deps) + --rehash, -H hash all files for traces (do not trust `.hash` files) --update, -U update manifest before building COMMANDS: diff --git a/src/lake/Lake/CLI/Main.lean b/src/lake/Lake/CLI/Main.lean index 6696356632..5f03d65e19 100644 --- a/src/lake/Lake/CLI/Main.lean +++ b/src/lake/Lake/CLI/Main.lean @@ -33,8 +33,9 @@ structure LakeOptions where subArgs : List String := [] wantsHelp : Bool := false verbosity : Verbosity := .normal - oldMode : Bool := false updateDeps : Bool := false + oldMode : Bool := false + trustHash : Bool := true /-- Get the Lean installation. Error if missing. -/ def LakeOptions.getLeanInstall (opts : LakeOptions) : Except CliError LeanInstall := @@ -66,7 +67,12 @@ def LakeOptions.mkLoadConfig (opts : LakeOptions) : EIO CliError LoadConfig := leanOpts := Lean.Options.empty } -export LakeOptions (mkLoadConfig) +/-- Make a `BuildConfig` from a `LakeOptions`. -/ +def LakeOptions.mkBuildConfig (opts : LakeOptions) : BuildConfig where + oldMode := opts.oldMode + trustHash := opts.trustHash + +export LakeOptions (mkLoadConfig mkBuildConfig) /-! ## Monad -/ @@ -134,6 +140,7 @@ def lakeShortOption : (opt : Char) → CliM PUnit | 'K' => do setConfigOpt <| ← takeOptArg "-K" "key-value pair" | 'U' => modifyThe LakeOptions ({· with updateDeps := true}) | 'h' => modifyThe LakeOptions ({· with wantsHelp := true}) +| 'H' => modifyThe LakeOptions ({· with trustHash := false}) | opt => throw <| CliError.unknownShortOption opt def lakeLongOption : (opt : String) → CliM PUnit @@ -141,6 +148,7 @@ def lakeLongOption : (opt : String) → CliM PUnit | "--verbose" => modifyThe LakeOptions ({· with verbosity := .verbose}) | "--update" => modifyThe LakeOptions ({· with updateDeps := true}) | "--old" => modifyThe LakeOptions ({· with oldMode := true}) +| "--rehash" => modifyThe LakeOptions ({· with trustHash := false}) | "--dir" => do let rootDir ← takeOptArg "--dir" "path"; modifyThe LakeOptions ({· with rootDir}) | "--file" => do let configFile ← takeOptArg "--file" "path"; modifyThe LakeOptions ({· with configFile}) | "--lean" => do setLean <| ← takeOptArg "--lean" "path or command" @@ -265,7 +273,8 @@ protected def build : CliM PUnit := do let ws ← loadWorkspace config opts.updateDeps let targetSpecs ← takeArgs let specs ← parseTargetSpecs ws targetSpecs - ws.runBuild (buildSpecs specs) opts.oldMode |>.run (MonadLog.io opts.verbosity) + let buildConfig := mkBuildConfig opts + ws.runBuild (buildSpecs specs) buildConfig |>.run (MonadLog.io opts.verbosity) protected def resolveDeps : CliM PUnit := do processOptions lakeOption @@ -293,8 +302,9 @@ protected def upload : CliM PUnit := do protected def printPaths : CliM PUnit := do processOptions lakeOption let opts ← getThe LakeOptions - let config ← mkLoadConfig opts - printPaths config (← takeArgs) opts.oldMode opts.verbosity + let loadConfig ← mkLoadConfig opts + let buildConfig := mkBuildConfig opts + printPaths loadConfig (← takeArgs) buildConfig opts.verbosity protected def clean : CliM PUnit := do processOptions lakeOption @@ -349,7 +359,8 @@ protected def exe : CliM PUnit := do let config ← mkLoadConfig opts let ws ← loadWorkspace config let ctx := mkLakeContext ws - exit <| ← (exe exeName args.toArray opts.oldMode).run ctx + let buildConfig := mkBuildConfig opts + exit <| ← (exe exeName args.toArray buildConfig).run ctx protected def selfCheck : CliM PUnit := do processOptions lakeOption diff --git a/src/lake/Lake/CLI/Serve.lean b/src/lake/Lake/CLI/Serve.lean index 12fb221034..ed0e1c204a 100644 --- a/src/lake/Lake/CLI/Serve.lean +++ b/src/lake/Lake/CLI/Serve.lean @@ -26,15 +26,15 @@ If no configuration file exists, exit silently with `noConfigFileCode` (i.e, 2). The `print-paths` command is used internally by Lean 4 server. -/ -def printPaths (config : LoadConfig) (imports : List String := []) -(oldMode : Bool := false) (verbosity : Verbosity := .normal) : MainM PUnit := do - if (← config.configFile.pathExists) then +def printPaths (loadConfig : LoadConfig) (imports : List String := []) +(buildConfig : BuildConfig := {}) (verbosity : Verbosity := .normal) : MainM PUnit := do + if (← loadConfig.configFile.pathExists) then if let some errLog := (← IO.getEnv invalidConfigEnvVar) then IO.eprint errLog IO.eprintln s!"Invalid Lake configuration. Please restart the server after fixing the Lake configuration file." exit 1 - let ws ← MainM.runLogIO (loadWorkspace config) verbosity - let dynlibs ← ws.runBuild (buildImportsAndDeps imports) oldMode + let ws ← MainM.runLogIO (loadWorkspace loadConfig) verbosity + let dynlibs ← ws.runBuild (buildImportsAndDeps imports) buildConfig |>.run (MonadLog.eio verbosity) IO.println <| Json.compress <| toJson { oleanPath := ws.leanPath