fix: lake: ill-formed build output handling (#10586)

This PR makes Lake no longer error if build outputs found in a trace
file (or in the artifact cache) are ill-formed.

This is caused a problem with the CI cache and is just generally too
strict.
This commit is contained in:
Mac Malone 2025-09-26 23:35:57 -04:00 committed by GitHub
parent 05300f7b51
commit 18832eb600
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 65 additions and 49 deletions

View file

@ -430,7 +430,7 @@ public def Cache.saveArtifact
let normalized := contents.crlfToLf
let hash := Hash.ofString normalized
let descr := artifactWithExt hash ext
let path := cache.artifactDir / descr.toFilePath
let path := cache.artifactDir / descr.relPath
createParentDirs path
IO.FS.writeFile path normalized
writeFileHash file hash
@ -440,7 +440,7 @@ public def Cache.saveArtifact
let contents ← IO.FS.readBinFile file
let hash := Hash.ofByteArray contents
let descr := artifactWithExt hash ext
let path := cache.artifactDir / descr.toFilePath
let path := cache.artifactDir / descr.relPath
createParentDirs path
IO.FS.writeBinFile path contents
if exe then
@ -458,7 +458,8 @@ public def cacheArtifact
/-- **For internal use only.** -/
public class ResolveOutputs (m : Type v → Type w) (α : Type v) where
resolveOutputs? (outputs : Json) : m (Option α)
/-- **For internal use only.** -/
resolveOutputs? (outputs : Json) : m (Except String α)
open ResolveOutputs in
/--
@ -474,45 +475,34 @@ in either the saved trace file or in the cached input-to-content mapping.
: JobM (Option α) := do
let updateCache ← pkg.isArtifactCacheEnabled
if let some out ← cache.readOutputs? pkg.cacheScope inputHash then
if let some arts ← resolveOutputs? out then
match (← resolveOutputs? out) with
| .ok arts =>
savedTrace.replayOrFetch traceFile inputHash out
return some arts
else
| .error e =>
logWarning s!"\
input '{inputHash.toString.take 7}' found in package artifact cache, \
but some output(s) were not"
but some output(s) have issues: {e}"
if let .ok data := savedTrace then
if data.depHash == inputHash then
if let some out := data.outputs? then
if let some arts ← resolveOutputs? out then
if let .ok arts ← resolveOutputs? out then
if updateCache then
cache.writeOutputs pkg.cacheScope inputHash out
savedTrace.replayOrFetch traceFile inputHash out
return some arts
return none
@[inline] def resolveArtifactsCore?
[FromJson α] [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m]
(f : α → m (Option β)) (out : Json)
: m (Option β) := do
match fromJson? out with
| .ok (a : α) => f a
| .error e => error e
@[inline] def resolveArtifact
[MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] (output : Json)
: m (Option Artifact) := resolveArtifactsCore? (do (← getLakeCache).getArtifact? ·) output
@[inline] def resolveOutput?
[MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] (output : Json)
: m (Option Artifact) := do
@[inline] def resolveArtifactOutput?
[MonadWorkspace m] [MonadLiftT BaseIO m] [Monad m] (output : Json)
: m (Except String Artifact) := do
match fromJson? output with
| .ok descr => getArtifact? descr
| .error e => error s!"unexpected output: {e}"
| .ok descr => (← getLakeCache).getArtifact descr |>.toBaseIO
| .error e => return .error s!"ill-formed artifact output `{output}`: {e}"
instance
[MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m]
: ResolveOutputs m Artifact := ⟨resolveOutput?⟩
[MonadWorkspace m] [MonadLiftT BaseIO m] [Monad m]
: ResolveOutputs m Artifact := ⟨resolveArtifactOutput?⟩
/--
Construct an artifact from a path outside the Lake artifact cache.

View file

@ -545,30 +545,32 @@ public def Module.cacheOutputHashes (mod : Module) : IO PUnit := do
if Lean.Internal.hasLLVMBackend () then
cacheFileHash mod.bcFile
private def ModuleOutputDescrs.getArtifactsFrom?
def ModuleOutputDescrs.getArtifactsFrom
(cache : Cache) (descrs : ModuleOutputDescrs)
: BaseIO (Option ModuleOutputArtifacts) := OptionT.run do
let mut arts : ModuleOutputArtifacts := {
olean := ← cache.getArtifact? descrs.olean
ilean := ← cache.getArtifact? descrs.ilean
c :=← cache.getArtifact? descrs.c
: EIO String ModuleOutputArtifacts := do
let arts : ModuleOutputArtifacts := {
olean := ← cache.getArtifact descrs.olean
oleanServer? := ← descrs.oleanServer?.mapM cache.getArtifact
oleanPrivate? := ← descrs.oleanPrivate?.mapM cache.getArtifact
ir? := ← descrs.ir?.mapM cache.getArtifact
ilean := ← cache.getArtifact descrs.ilean
c :=← cache.getArtifact descrs.c
bc? := none
}
if let some hash := descrs.oleanServer? then
arts := {arts with oleanServer? := some (← cache.getArtifact? hash)}
if let some hash := descrs.oleanPrivate? then
arts := {arts with oleanPrivate? := some (← cache.getArtifact? hash)}
if let some hash := descrs.ir? then
arts := {arts with ir? := some (← cache.getArtifact? hash)}
if Lean.Internal.hasLLVMBackend () then
arts := {arts with bc? := some (← cache.getArtifact? (← descrs.bc?))}
return arts
let some descr := descrs.bc?
| error "LLVM backend enabled but module outputs lack bitcode"
return {arts with bc? := some (← cache.getArtifact descr)}
else
return arts
@[inline] def resolveModuleOutputs?
[MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] (outputs : Json)
: m (Option ModuleOutputArtifacts) := do
: m (Except String ModuleOutputArtifacts) := do
match fromJson? outputs with
| .ok (descrs : ModuleOutputDescrs) => descrs.getArtifactsFrom? (← getLakeCache)
| .error e => error e
| .ok (descrs : ModuleOutputDescrs) =>
descrs.getArtifactsFrom (← getLakeCache) |>.toBaseIO
| .error e => return .error s!"ill-formed module outputs: {e}"
instance
[MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m]

View file

@ -40,11 +40,11 @@ attribute [deprecated artifactWithExt (since := "2025-08-30")] ArtifactDescr.mk
namespace ArtifactDescr
/-- Returns the relative file path used for the output's artifact in the Lake cache (c.f. `artifactPath`). -/
@[inline] public def toFilePath (self : ArtifactDescr) : FilePath :=
@[inline] public def relPath (self : ArtifactDescr) : FilePath :=
artifactPath self.hash self.ext
public instance : ToString ArtifactDescr := ⟨(·.toFilePath.toString)⟩
public instance : ToJson ArtifactDescr := ⟨(toJson ·.toFilePath)⟩
public instance : ToString ArtifactDescr := ⟨(·.relPath.toString)⟩
public instance : ToJson ArtifactDescr := ⟨(toJson ·.relPath)⟩
/-- Parse an output's relative file path into an `ArtifactDescr`. -/
public def ofFilePath? (path : FilePath) : Except String ArtifactDescr := do

View file

@ -215,7 +215,7 @@ namespace Cache
/-- Returns the artifact in the Lake cache corresponding the given artifact description. -/
public def getArtifact? (cache : Cache) (descr : ArtifactDescr) : BaseIO (Option Artifact) := do
let path := cache.artifactDir / descr.toFilePath
let path := cache.artifactDir / descr.relPath
if let .ok mtime ← getMTime path |>.toBaseIO then
return some {descr, path, mtime}
else if (← path.pathExists) then
@ -223,12 +223,22 @@ public def getArtifact? (cache : Cache) (descr : ArtifactDescr) : BaseIO (Option
else
return none
/-- Returns the artifact in the Lake cache corresponding the given artifact description. Errors if missing. -/
public def getArtifact (cache : Cache) (descr : ArtifactDescr) : EIO String Artifact := do
let path := cache.artifactDir / descr.relPath
if let .ok mtime ← getMTime path |>.toBaseIO then
return {descr, path, mtime}
else if (← path.pathExists) then
return {descr, path, mtime := 0}
else
error s!"artifact not found in cache: {path}"
/-- Returns path to the artifact for each output. Errors if any are missing. -/
public def getArtifactPaths
(cache : Cache) (descrs : Array ArtifactDescr)
: LogIO (Vector FilePath descrs.size) := throwIfLogs do
(Vector.mk descrs rfl).mapM fun out => do
let art := cache.artifactDir / out.toFilePath
let art := cache.artifactDir / out.relPath
unless (← art.pathExists) do
logError s!"artifact not found in cache: {art}"
return art
@ -325,7 +335,7 @@ public def downloadArtifact
(service : CacheService) (force := false)
: LoggerIO Unit := do
let url := service.artifactUrl descr.hash scope
let path := cache.artifactDir / descr.toFilePath
let path := cache.artifactDir / descr.relPath
if (← path.pathExists) && !force then
return
logInfo s!"\

View file

@ -153,5 +153,19 @@ test_cached +Module:olean.server !
test_cached +Module:olean.private !
test_cached +Module:ir !
# Verify that invalid outputs do not break Lake
if command -v jq > /dev/null; then # skip if no jq found
libPath=$($LAKE query Test:static)
test_cmd rm -f $libPath
inputHash=$(jq -r '.depHash' $libPath.trace)
echo $inputHash
echo bogus > "$CACHE_DIR/outputs/test/$inputHash.json"
test_out 'invalid JSON' build Test:static
test_cmd rm -f $libPath
echo '"bogus"' > "$CACHE_DIR/outputs/test/$inputHash.json"
test_out 'some output(s) have issues' build Test:static
test_exp -f $libPath
fi
# Cleanup
rm -f produced.out Ignored.lean