From 4384344465511764a9cf70aff0cdf5dd98e9a12c Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Wed, 4 Mar 2026 23:53:59 -0500 Subject: [PATCH] feat: lake: use trace mtime for arts when possible (#12799) This PR changes Lake to use the modification times of traces (where available) for artifact modification times. When artifacts are hard-linked from the cache, they retain the modification time of the artifact in the cache. Thus, the artifact modification time is an unreliable metric for determining whether an artifact is up-to-date relative to other artifacts in the presence of the cache. The trace file, however, is modified consistently when the artifacts are updated, making it the most reliable indicator of modification time. --- src/lake/Lake/Build/Common.lean | 75 +++++++++++++++++------------- src/lake/Lake/Build/Module.lean | 53 ++++++++++++++++++++- src/lake/Lake/Config/Artifact.lean | 2 +- src/lake/Lake/Config/Cache.lean | 25 +++++----- src/lake/Lake/Config/Module.lean | 17 ------- src/lake/Lake/Config/Monad.lean | 24 ++++++---- 6 files changed, 123 insertions(+), 73 deletions(-) diff --git a/src/lake/Lake/Build/Common.lean b/src/lake/Lake/Build/Common.lean index 490da1e5e1..c201cab89b 100644 --- a/src/lake/Lake/Build/Common.lean +++ b/src/lake/Lake/Build/Common.lean @@ -227,13 +227,15 @@ Checks if the `info` is up-to-date by comparing `depTrace` with `depHash`. If old mode is enabled (e.g., `--old`), uses the `oldTrace` modification time as the point of comparison instead. -/ -@[inline] public def checkHashUpToDate +@[inline, deprecated "Deprecated without replacement." (since := "2025-03-04")] +public def checkHashUpToDate [CheckExists ι] [GetMTime ι] (info : ι) (depTrace : BuildTrace) (depHash : Option Hash) (oldTrace := depTrace.mtime) : JobM Bool := (·.isUpToDate) <$> checkHashUpToDate' info depTrace depHash oldTrace /-- +**Ror internal use only.** Checks whether `info` is up-to-date with the trace. If so, replays the log of the trace if available. -/ @@ -265,7 +267,7 @@ If so, replays the log of the trace if available. : JobM Bool := (·.isUpToDate) <$> savedTrace.replayIfUpToDate' info depTrace oldTrace /-- -Returns if the saved trace exists and its hash matches `inputHash`. +Returns `true` if the saved trace exists and its hash matches `inputHash`. If up-to-date, replays the saved log from the trace and sets the current build action to `replay`. Otherwise, if the log is empty and trace is synthetic, @@ -540,36 +542,35 @@ public def resolveArtifact : JobM Artifact := do let ws ← getWorkspace let path := ws.lakeCache.artifactDir / descr.relPath - if let some art ← computeArtifact? descr path then - return art - else if let some service := service? then - if let some service := ws.findCacheService? service.toString then - let some scope := scope? - | error s!"artifact with associated cache service but no scope" - let url := service.artifactUrl descr.hash scope - logVerbose s!"\ - downloaded artifact {descr.hash}\ - \n local path: {path}\ - \n remote URL: {url}" - liftM <| downloadArtifactCore descr.hash url path - let r := {read := true, write := false, execution := exe} - if let .error e ← IO.setAccessRights path ⟨r, r, r⟩ |>.toBaseIO then - logWarning s!"could not mark downloaded artifact read-only: {e}" - let some art ← computeArtifact? descr path - | error s!"downloaded succeeded, but artifact failed to resolve:\n {path}" - return art + match (← getMTime path |>.toBaseIO) with + | .ok mtime => + return {descr, path, mtime} + | .error (.noFileOrDirectory ..) => + -- we redownload artifacts on any error + if let some service := service? then + if let some service := ws.findCacheService? service.toString then + let some scope := scope? + | error s!"artifact with associated cache service but no scope" + let url := service.artifactUrl descr.hash scope + logVerbose s!"\ + downloaded artifact {descr.hash}\ + \n local path: {path}\ + \n remote URL: {url}" + liftM <| downloadArtifactCore descr.hash url path + let r := {read := true, write := false, execution := exe} + if let .error e ← IO.setAccessRights path ⟨r, r, r⟩ |>.toBaseIO then + logWarning s!"could not mark downloaded artifact read-only: {e}" + match (← getMTime path |>.toBaseIO) with + | .ok mtime => + return {descr, path, mtime} + | .error e => + error s!"downloaded succeeded, but artifact failed to resolve: {e}" + else + error s!"artifact cache service is not configured: {service}" else - error s!"artifact cache service is not configured: {service}" - else - error s!"artifact not found in cache:\n {path}" -where - computeArtifact? descr path : BaseIO (Option Artifact) := do - if let .ok mtime ← getMTime path |>.toBaseIO then - return some {descr, path, mtime} - else if (← path.pathExists) then - return some {descr, path, mtime := 0} - else - return none + error s!"artifact not found in cache:\n {path}" + | .error e => + error s!"failed to retrieve artifact from cache: {e}" def resolveArtifactOutput (out : Json) (service? : Option CacheServiceName) (scope? : Option CacheServiceScope) @@ -676,7 +677,7 @@ public def buildArtifactUnlessUpToDate if let some outputsRef := pkg.outputsRef? then outputsRef.insert inputHash art.descr setTrace art.trace - return art + setMTime art traceFile else let art ← if (← savedTrace.replayIfUpToDate file depTrace) then @@ -684,7 +685,7 @@ public def buildArtifactUnlessUpToDate else doBuild depTrace traceFile setTrace art.trace - return art + setMTime art traceFile where doBuild depTrace traceFile := inline <| buildAction depTrace traceFile do @@ -693,6 +694,14 @@ where clearFileHash file removeFileIfExists traceFile computeArtifact file ext text + setMTime art traceFile := do + match (← getMTime traceFile |>.toBaseIO) with + | .ok mtime => + return {art with mtime} + | .error (.noFileOrDirectory ..) => -- trace file may not exist + return art + | .error e => + error s!"failed to retrieve artifact modification time: {e}" /-- Build `file` using `build` after `dep` completes if the dependency's diff --git a/src/lake/Lake/Build/Module.lean b/src/lake/Lake/Build/Module.lean index 3a2524733e..e8ad7adfd5 100644 --- a/src/lake/Lake/Build/Module.lean +++ b/src/lake/Lake/Build/Module.lean @@ -668,6 +668,49 @@ private def Module.restoreAllArtifacts (mod : Module) (cached : ModuleOutputArti where @[inline] restoreSome file art? := art?.mapM (restoreArtifact file ·) +public protected def Module.checkExists (self : Module) (isModule : Bool) : BaseIO Bool := do + unless (← self.oleanFile.pathExists) do return false + unless (← self.ileanFile.pathExists) do return false + unless (← self.cFile.pathExists) do return false + if Lean.Internal.hasLLVMBackend () then + unless (← self.bcFile.pathExists) do return false + if isModule then + unless (← self.oleanServerFile.pathExists) do return false + unless (← self.oleanPrivateFile.pathExists) do return false + unless (← self.irFile.pathExists) do return false + return true + +@[deprecated Module.checkExists (since := "2025-03-04")] +public instance : CheckExists Module := ⟨Module.checkExists (isModule := false)⟩ + +public protected def Module.getMTime (self : Module) (isModule : Bool) : IO MTime := do + let mut mtime := + (← getMTime self.oleanFile) + |> max (← getMTime self.ileanFile) + |> max (← getMTime self.cFile) + if Lean.Internal.hasLLVMBackend () then + mtime := max mtime (← getMTime self.bcFile) + if isModule then + mtime := mtime + |> max (← getMTime self.oleanServerFile) + |> max (← getMTime self.oleanPrivateFile) + |> max (← getMTime self.irFile) + return mtime + +@[deprecated Module.getMTime (since := "2025-03-04")] +public instance : GetMTime Module := ⟨Module.getMTime (isModule := false)⟩ + +private def ModuleOutputArtifacts.setMTime (self : ModuleOutputArtifacts) (mtime : MTime) : ModuleOutputArtifacts := + {self with + olean := {self.olean with mtime} + oleanServer? := self.oleanServer?.map ({· with mtime}) + oleanPrivate? := self.oleanPrivate?.map ({· with mtime}) + ilean := {self.ilean with mtime} + ir? := self.ir?.map ({· with mtime}) + c := {self.c with mtime} + bc? := self.bc?.map ({· with mtime}) + } + private def Module.mkArtifacts (mod : Module) (srcFile : FilePath) (isModule : Bool) : ModuleArtifacts where lean? := srcFile @@ -746,6 +789,8 @@ private def Module.recBuildLean (mod : Module) : FetchM (Job ModuleOutputArtifac let depTrace ← getTrace let inputHash := depTrace.hash let savedTrace ← readTraceFile mod.traceFile + have : CheckExists Module := ⟨Module.checkExists (isModule := setup.isModule)⟩ + have : GetMTime Module := ⟨Module.getMTime (isModule := setup.isModule)⟩ let fetchArtsFromCache? restoreAll := do let some arts ← getArtifacts? inputHash savedTrace mod.pkg | return none @@ -779,7 +824,13 @@ private def Module.recBuildLean (mod : Module) : FetchM (Job ModuleOutputArtifac mod.buildLean depTrace srcFile setup if let some ref := mod.pkg.outputsRef? then ref.insert inputHash arts.descrs - return arts + match (← getMTime mod.traceFile |>.toBaseIO) with + | .ok mtime => + return arts.setMTime mtime + | .error (.noFileOrDirectory ..) => -- trace file may not exist + return arts + | .error e => + error s!"failed to retrieve module artifact modification time: {e}" /-- The `ModuleFacetConfig` for the builtin `leanArtsFacet`. -/ public def Module.leanArtsFacetConfig : ModuleFacetConfig leanArtsFacet := diff --git a/src/lake/Lake/Config/Artifact.lean b/src/lake/Lake/Config/Artifact.lean index 38dc6a549a..41d6af1a9a 100644 --- a/src/lake/Lake/Config/Artifact.lean +++ b/src/lake/Lake/Config/Artifact.lean @@ -75,7 +75,7 @@ public structure Artifact extends descr : ArtifactDescr where path : FilePath /-- The artifact's. This is used, for example, as a caption in traces. -/ name := path.toString - /-- The artifact's modification time (or `0` if unknown). -/ + /-- The artifact's modification time. -/ mtime : MTime deriving Inhabited, Repr diff --git a/src/lake/Lake/Config/Cache.lean b/src/lake/Lake/Config/Cache.lean index e083cb708c..880782a38f 100644 --- a/src/lake/Lake/Config/Cache.lean +++ b/src/lake/Lake/Config/Cache.lean @@ -351,26 +351,29 @@ namespace Cache cache.artifactDir / artifactPath contentHash ext /-- Returns the artifact in the Lake cache corresponding the given artifact description. -/ +@[deprecated "Deprecated without replacelement." (since := "2025-03-04")] public def getArtifact? (cache : Cache) (descr : ArtifactDescr) : BaseIO (Option Artifact) := do let path := cache.artifactDir / descr.relPath - if let .ok mtime ← getMTime path |>.toBaseIO then - return some {descr, path, mtime} - else if (← path.pathExists) then - return some {descr, path, mtime := 0} - else - return none + let .ok mtime ← getMTime path |>.toBaseIO + | return none + return some {descr, path, mtime} /-- Returns the artifact in the Lake cache corresponding the given artifact description. Errors if missing. -/ +@[deprecated "Deprecated without replacelement." (since := "2025-03-04")] public def getArtifact (cache : Cache) (descr : ArtifactDescr) : EIO String Artifact := do let path := cache.artifactDir / descr.relPath - if let .ok mtime ← getMTime path |>.toBaseIO then + match (← getMTime path |>.toBaseIO) with + | .ok mtime => return {descr, path, mtime} - else if (← path.pathExists) then - return {descr, path, mtime := 0} - else + | .error (.noFileOrDirectory ..) => error s!"artifact not found in cache: {path}" + | .error e => + error s!"failed to retrieve artifact from cache: {e}" -/-- Returns path to the artifact for each output. Errors if any are missing. -/ +/-- +**For internal use only.** +Returns path to the artifact for each output. Errors if any are missing. +-/ public def getArtifactPaths (cache : Cache) (descrs : Array ArtifactDescr) : LogIO (Vector FilePath descrs.size) := throwIfLogs do diff --git a/src/lake/Lake/Config/Module.lean b/src/lake/Lake/Config/Module.lean index 231cfb57bd..a7b115c9d2 100644 --- a/src/lake/Lake/Config/Module.lean +++ b/src/lake/Lake/Config/Module.lean @@ -199,20 +199,3 @@ public def dynlibSuffix := "-1" @[inline] public def nativeFacets (self : Module) (shouldExport : Bool) : Array (ModuleFacet FilePath) := self.lib.nativeFacets shouldExport - -/-! ## Trace Helpers -/ - -public protected def getMTime (self : Module) : IO MTime := do - return mixTrace (mixTrace (← getMTime self.oleanFile) (← getMTime self.ileanFile)) (← getMTime self.cFile) - -public instance : GetMTime Module := ⟨Module.getMTime⟩ - -public protected def checkExists (self : Module) : BaseIO Bool := do - let bcFileExists? ← - if Lean.Internal.hasLLVMBackend () then - checkExists self.bcFile - else - pure true - return (← checkExists self.oleanFile) && (← checkExists self.ileanFile) && (← checkExists self.cFile) && bcFileExists? - -public instance : CheckExists Module := ⟨Module.checkExists⟩ diff --git a/src/lake/Lake/Config/Monad.lean b/src/lake/Lake/Config/Monad.lean index cc834f2a94..49d281c261 100644 --- a/src/lake/Lake/Config/Monad.lean +++ b/src/lake/Lake/Config/Monad.lean @@ -166,24 +166,28 @@ public def getLeanArgs : m (Array String) := @[inline] public def getLakeCache : m Cache := (·.lakeCache) <$> getWorkspace -@[inline, inherit_doc Cache.getArtifact?] +set_option linter.deprecated false in +@[inline, inherit_doc Cache.getArtifact?, +deprecated "Deprecated without replacelement." (since := "2025-03-04")] public def getArtifact? [Bind m] [MonadLiftT BaseIO m] (descr : ArtifactDescr) : m (Option Artifact) := getLakeCache >>= (·.getArtifact? descr) +/-- +Returns whether the package should retrieve its artifacts from the Lake artifact cache. + +If the package has not configured the artifact cache itself through +{lean}`Package.enableArtifactCache?`, this will default to the workspace configuration. +If not configured at all, this defaults to {lean}`true`. +-/ +@[inline] public def Package.isArtifactCacheReadable [MonadWorkspace m] (self : Package) : m Bool := + (self.enableArtifactCache? <|> ·.enableArtifactCache? |>.getD true) <$> getWorkspace + /-- Returns whether the package should store its artifacts in the Lake artifact cache. If the package has not configured the artifact cache itself through {lean}`Package.enableArtifactCache?`, this will default to the workspace configuration. --/ -@[inline] public def Package.isArtifactCacheReadable [MonadWorkspace m] (self : Package) : m Bool := - (self.enableArtifactCache? <|> ·.enableArtifactCache? |>.getD true) <$> getWorkspace - -/-- -Returns whether the package should restore its artifacts from the Lake artifact cache. - -If the package has not configured the artifact cache itself through -{lean}`Package.enableArtifactCache?`, this will default to the workspace configuration. +If not configured at all, this defaults to {lean}`false`. -/ @[inline] public def Package.isArtifactCacheWritable [MonadWorkspace m] (self : Package) : m Bool := (self.enableArtifactCache? <|> ·.enableArtifactCache? |>.getD false) <$> getWorkspace