feat: lake: use trace mtime for arts when possible (#12799)

This PR changes Lake to use the modification times of traces (where
available) for artifact modification times.

When artifacts are hard-linked from the cache, they retain the
modification time of the artifact in the cache. Thus, the artifact
modification time is an unreliable metric for determining whether an
artifact is up-to-date relative to other artifacts in the presence of
the cache. The trace file, however, is modified consistently when the
artifacts are updated, making it the most reliable indicator of
modification time.
This commit is contained in:
Mac Malone 2026-03-04 23:53:59 -05:00 committed by GitHub
parent 3cfa2dac42
commit 4384344465
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 123 additions and 73 deletions

View file

@ -227,13 +227,15 @@ Checks if the `info` is up-to-date by comparing `depTrace` with `depHash`.
If old mode is enabled (e.g., `--old`), uses the `oldTrace` modification time
as the point of comparison instead.
-/
@[inline] public def checkHashUpToDate
@[inline, deprecated "Deprecated without replacement." (since := "2025-03-04")]
public def checkHashUpToDate
[CheckExists ι] [GetMTime ι]
(info : ι) (depTrace : BuildTrace) (depHash : Option Hash)
(oldTrace := depTrace.mtime)
: JobM Bool := (·.isUpToDate) <$> checkHashUpToDate' info depTrace depHash oldTrace
/--
**Ror internal use only.**
Checks whether `info` is up-to-date with the trace.
If so, replays the log of the trace if available.
-/
@ -265,7 +267,7 @@ If so, replays the log of the trace if available.
: JobM Bool := (·.isUpToDate) <$> savedTrace.replayIfUpToDate' info depTrace oldTrace
/--
Returns if the saved trace exists and its hash matches `inputHash`.
Returns `true` if the saved trace exists and its hash matches `inputHash`.
If up-to-date, replays the saved log from the trace and sets the current
build action to `replay`. Otherwise, if the log is empty and trace is synthetic,
@ -540,36 +542,35 @@ public def resolveArtifact
: JobM Artifact := do
let ws ← getWorkspace
let path := ws.lakeCache.artifactDir / descr.relPath
if let some art ← computeArtifact? descr path then
return art
else if let some service := service? then
if let some service := ws.findCacheService? service.toString then
let some scope := scope?
| error s!"artifact with associated cache service but no scope"
let url := service.artifactUrl descr.hash scope
logVerbose s!"\
downloaded artifact {descr.hash}\
\n local path: {path}\
\n remote URL: {url}"
liftM <| downloadArtifactCore descr.hash url path
let r := {read := true, write := false, execution := exe}
if let .error e ← IO.setAccessRights path ⟨r, r, r⟩ |>.toBaseIO then
logWarning s!"could not mark downloaded artifact read-only: {e}"
let some art ← computeArtifact? descr path
| error s!"downloaded succeeded, but artifact failed to resolve:\n {path}"
return art
match (← getMTime path |>.toBaseIO) with
| .ok mtime =>
return {descr, path, mtime}
| .error (.noFileOrDirectory ..) =>
-- we redownload artifacts on any error
if let some service := service? then
if let some service := ws.findCacheService? service.toString then
let some scope := scope?
| error s!"artifact with associated cache service but no scope"
let url := service.artifactUrl descr.hash scope
logVerbose s!"\
downloaded artifact {descr.hash}\
\n local path: {path}\
\n remote URL: {url}"
liftM <| downloadArtifactCore descr.hash url path
let r := {read := true, write := false, execution := exe}
if let .error e ← IO.setAccessRights path ⟨r, r, r⟩ |>.toBaseIO then
logWarning s!"could not mark downloaded artifact read-only: {e}"
match (← getMTime path |>.toBaseIO) with
| .ok mtime =>
return {descr, path, mtime}
| .error e =>
error s!"downloaded succeeded, but artifact failed to resolve: {e}"
else
error s!"artifact cache service is not configured: {service}"
else
error s!"artifact cache service is not configured: {service}"
else
error s!"artifact not found in cache:\n {path}"
where
computeArtifact? descr path : BaseIO (Option Artifact) := do
if let .ok mtime ← getMTime path |>.toBaseIO then
return some {descr, path, mtime}
else if (← path.pathExists) then
return some {descr, path, mtime := 0}
else
return none
error s!"artifact not found in cache:\n {path}"
| .error e =>
error s!"failed to retrieve artifact from cache: {e}"
def resolveArtifactOutput
(out : Json) (service? : Option CacheServiceName) (scope? : Option CacheServiceScope)
@ -676,7 +677,7 @@ public def buildArtifactUnlessUpToDate
if let some outputsRef := pkg.outputsRef? then
outputsRef.insert inputHash art.descr
setTrace art.trace
return art
setMTime art traceFile
else
let art ←
if (← savedTrace.replayIfUpToDate file depTrace) then
@ -684,7 +685,7 @@ public def buildArtifactUnlessUpToDate
else
doBuild depTrace traceFile
setTrace art.trace
return art
setMTime art traceFile
where
doBuild depTrace traceFile :=
inline <| buildAction depTrace traceFile do
@ -693,6 +694,14 @@ where
clearFileHash file
removeFileIfExists traceFile
computeArtifact file ext text
setMTime art traceFile := do
match (← getMTime traceFile |>.toBaseIO) with
| .ok mtime =>
return {art with mtime}
| .error (.noFileOrDirectory ..) => -- trace file may not exist
return art
| .error e =>
error s!"failed to retrieve artifact modification time: {e}"
/--
Build `file` using `build` after `dep` completes if the dependency's

View file

@ -668,6 +668,49 @@ private def Module.restoreAllArtifacts (mod : Module) (cached : ModuleOutputArti
where
@[inline] restoreSome file art? := art?.mapM (restoreArtifact file ·)
public protected def Module.checkExists (self : Module) (isModule : Bool) : BaseIO Bool := do
unless (← self.oleanFile.pathExists) do return false
unless (← self.ileanFile.pathExists) do return false
unless (← self.cFile.pathExists) do return false
if Lean.Internal.hasLLVMBackend () then
unless (← self.bcFile.pathExists) do return false
if isModule then
unless (← self.oleanServerFile.pathExists) do return false
unless (← self.oleanPrivateFile.pathExists) do return false
unless (← self.irFile.pathExists) do return false
return true
@[deprecated Module.checkExists (since := "2025-03-04")]
public instance : CheckExists Module := ⟨Module.checkExists (isModule := false)⟩
public protected def Module.getMTime (self : Module) (isModule : Bool) : IO MTime := do
let mut mtime :=
(← getMTime self.oleanFile)
|> max (← getMTime self.ileanFile)
|> max (← getMTime self.cFile)
if Lean.Internal.hasLLVMBackend () then
mtime := max mtime (← getMTime self.bcFile)
if isModule then
mtime := mtime
|> max (← getMTime self.oleanServerFile)
|> max (← getMTime self.oleanPrivateFile)
|> max (← getMTime self.irFile)
return mtime
@[deprecated Module.getMTime (since := "2025-03-04")]
public instance : GetMTime Module := ⟨Module.getMTime (isModule := false)⟩
private def ModuleOutputArtifacts.setMTime (self : ModuleOutputArtifacts) (mtime : MTime) : ModuleOutputArtifacts :=
{self with
olean := {self.olean with mtime}
oleanServer? := self.oleanServer?.map ({· with mtime})
oleanPrivate? := self.oleanPrivate?.map ({· with mtime})
ilean := {self.ilean with mtime}
ir? := self.ir?.map ({· with mtime})
c := {self.c with mtime}
bc? := self.bc?.map ({· with mtime})
}
private def Module.mkArtifacts (mod : Module) (srcFile : FilePath) (isModule : Bool) : ModuleArtifacts where
lean? := srcFile
@ -746,6 +789,8 @@ private def Module.recBuildLean (mod : Module) : FetchM (Job ModuleOutputArtifac
let depTrace ← getTrace
let inputHash := depTrace.hash
let savedTrace ← readTraceFile mod.traceFile
have : CheckExists Module := ⟨Module.checkExists (isModule := setup.isModule)⟩
have : GetMTime Module := ⟨Module.getMTime (isModule := setup.isModule)⟩
let fetchArtsFromCache? restoreAll := do
let some arts ← getArtifacts? inputHash savedTrace mod.pkg
| return none
@ -779,7 +824,13 @@ private def Module.recBuildLean (mod : Module) : FetchM (Job ModuleOutputArtifac
mod.buildLean depTrace srcFile setup
if let some ref := mod.pkg.outputsRef? then
ref.insert inputHash arts.descrs
return arts
match (← getMTime mod.traceFile |>.toBaseIO) with
| .ok mtime =>
return arts.setMTime mtime
| .error (.noFileOrDirectory ..) => -- trace file may not exist
return arts
| .error e =>
error s!"failed to retrieve module artifact modification time: {e}"
/-- The `ModuleFacetConfig` for the builtin `leanArtsFacet`. -/
public def Module.leanArtsFacetConfig : ModuleFacetConfig leanArtsFacet :=

View file

@ -75,7 +75,7 @@ public structure Artifact extends descr : ArtifactDescr where
path : FilePath
/-- The artifact's. This is used, for example, as a caption in traces. -/
name := path.toString
/-- The artifact's modification time (or `0` if unknown). -/
/-- The artifact's modification time. -/
mtime : MTime
deriving Inhabited, Repr

View file

@ -351,26 +351,29 @@ namespace Cache
cache.artifactDir / artifactPath contentHash ext
/-- Returns the artifact in the Lake cache corresponding the given artifact description. -/
@[deprecated "Deprecated without replacelement." (since := "2025-03-04")]
public def getArtifact? (cache : Cache) (descr : ArtifactDescr) : BaseIO (Option Artifact) := do
let path := cache.artifactDir / descr.relPath
if let .ok mtime ← getMTime path |>.toBaseIO then
return some {descr, path, mtime}
else if (← path.pathExists) then
return some {descr, path, mtime := 0}
else
return none
let .ok mtime ← getMTime path |>.toBaseIO
| return none
return some {descr, path, mtime}
/-- Returns the artifact in the Lake cache corresponding the given artifact description. Errors if missing. -/
@[deprecated "Deprecated without replacelement." (since := "2025-03-04")]
public def getArtifact (cache : Cache) (descr : ArtifactDescr) : EIO String Artifact := do
let path := cache.artifactDir / descr.relPath
if let .ok mtime ← getMTime path |>.toBaseIO then
match (← getMTime path |>.toBaseIO) with
| .ok mtime =>
return {descr, path, mtime}
else if (← path.pathExists) then
return {descr, path, mtime := 0}
else
| .error (.noFileOrDirectory ..) =>
error s!"artifact not found in cache: {path}"
| .error e =>
error s!"failed to retrieve artifact from cache: {e}"
/-- Returns path to the artifact for each output. Errors if any are missing. -/
/--
**For internal use only.**
Returns path to the artifact for each output. Errors if any are missing.
-/
public def getArtifactPaths
(cache : Cache) (descrs : Array ArtifactDescr)
: LogIO (Vector FilePath descrs.size) := throwIfLogs do

View file

@ -199,20 +199,3 @@ public def dynlibSuffix := "-1"
@[inline] public def nativeFacets (self : Module) (shouldExport : Bool) : Array (ModuleFacet FilePath) :=
self.lib.nativeFacets shouldExport
/-! ## Trace Helpers -/
public protected def getMTime (self : Module) : IO MTime := do
return mixTrace (mixTrace (← getMTime self.oleanFile) (← getMTime self.ileanFile)) (← getMTime self.cFile)
public instance : GetMTime Module := ⟨Module.getMTime⟩
public protected def checkExists (self : Module) : BaseIO Bool := do
let bcFileExists? ←
if Lean.Internal.hasLLVMBackend () then
checkExists self.bcFile
else
pure true
return (← checkExists self.oleanFile) && (← checkExists self.ileanFile) && (← checkExists self.cFile) && bcFileExists?
public instance : CheckExists Module := ⟨Module.checkExists⟩

View file

@ -166,24 +166,28 @@ public def getLeanArgs : m (Array String) :=
@[inline] public def getLakeCache : m Cache :=
(·.lakeCache) <$> getWorkspace
@[inline, inherit_doc Cache.getArtifact?]
set_option linter.deprecated false in
@[inline, inherit_doc Cache.getArtifact?,
deprecated "Deprecated without replacelement." (since := "2025-03-04")]
public def getArtifact? [Bind m] [MonadLiftT BaseIO m] (descr : ArtifactDescr) : m (Option Artifact) :=
getLakeCache >>= (·.getArtifact? descr)
/--
Returns whether the package should retrieve its artifacts from the Lake artifact cache.
If the package has not configured the artifact cache itself through
{lean}`Package.enableArtifactCache?`, this will default to the workspace configuration.
If not configured at all, this defaults to {lean}`true`.
-/
@[inline] public def Package.isArtifactCacheReadable [MonadWorkspace m] (self : Package) : m Bool :=
(self.enableArtifactCache? <|> ·.enableArtifactCache? |>.getD true) <$> getWorkspace
/--
Returns whether the package should store its artifacts in the Lake artifact cache.
If the package has not configured the artifact cache itself through
{lean}`Package.enableArtifactCache?`, this will default to the workspace configuration.
-/
@[inline] public def Package.isArtifactCacheReadable [MonadWorkspace m] (self : Package) : m Bool :=
(self.enableArtifactCache? <|> ·.enableArtifactCache? |>.getD true) <$> getWorkspace
/--
Returns whether the package should restore its artifacts from the Lake artifact cache.
If the package has not configured the artifact cache itself through
{lean}`Package.enableArtifactCache?`, this will default to the workspace configuration.
If not configured at all, this defaults to {lean}`false`.
-/
@[inline] public def Package.isArtifactCacheWritable [MonadWorkspace m] (self : Package) : m Bool :=
(self.enableArtifactCache? <|> ·.enableArtifactCache? |>.getD false) <$> getWorkspace