From 18832eb6004e1c37f4ad023b4956d6379564a197 Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Fri, 26 Sep 2025 23:35:57 -0400 Subject: [PATCH] fix: lake: ill-formed build output handling (#10586) This PR makes Lake no longer error if build outputs found in a trace file (or in the artifact cache) are ill-formed. This is caused a problem with the CI cache and is just generally too strict. --- src/lake/Lake/Build/Common.lean | 42 ++++++++++++------------------ src/lake/Lake/Build/Module.lean | 36 +++++++++++++------------ src/lake/Lake/Config/Artifact.lean | 6 ++--- src/lake/Lake/Config/Cache.lean | 16 +++++++++--- src/lake/tests/cache/test.sh | 14 ++++++++++ 5 files changed, 65 insertions(+), 49 deletions(-) diff --git a/src/lake/Lake/Build/Common.lean b/src/lake/Lake/Build/Common.lean index 2050c99d41..4f95096de8 100644 --- a/src/lake/Lake/Build/Common.lean +++ b/src/lake/Lake/Build/Common.lean @@ -430,7 +430,7 @@ public def Cache.saveArtifact let normalized := contents.crlfToLf let hash := Hash.ofString normalized let descr := artifactWithExt hash ext - let path := cache.artifactDir / descr.toFilePath + let path := cache.artifactDir / descr.relPath createParentDirs path IO.FS.writeFile path normalized writeFileHash file hash @@ -440,7 +440,7 @@ public def Cache.saveArtifact let contents ← IO.FS.readBinFile file let hash := Hash.ofByteArray contents let descr := artifactWithExt hash ext - let path := cache.artifactDir / descr.toFilePath + let path := cache.artifactDir / descr.relPath createParentDirs path IO.FS.writeBinFile path contents if exe then @@ -458,7 +458,8 @@ public def cacheArtifact /-- **For internal use only.** -/ public class ResolveOutputs (m : Type v → Type w) (α : Type v) where - resolveOutputs? (outputs : Json) : m (Option α) + /-- **For internal use only.** -/ + resolveOutputs? (outputs : Json) : m (Except String α) open ResolveOutputs in /-- @@ -474,45 +475,34 @@ in either the saved trace file or in the cached input-to-content mapping. : JobM (Option α) := do let updateCache ← pkg.isArtifactCacheEnabled if let some out ← cache.readOutputs? pkg.cacheScope inputHash then - if let some arts ← resolveOutputs? out then + match (← resolveOutputs? out) with + | .ok arts => savedTrace.replayOrFetch traceFile inputHash out return some arts - else + | .error e => logWarning s!"\ input '{inputHash.toString.take 7}' found in package artifact cache, \ - but some output(s) were not" + but some output(s) have issues: {e}" if let .ok data := savedTrace then if data.depHash == inputHash then if let some out := data.outputs? then - if let some arts ← resolveOutputs? out then + if let .ok arts ← resolveOutputs? out then if updateCache then cache.writeOutputs pkg.cacheScope inputHash out savedTrace.replayOrFetch traceFile inputHash out return some arts return none -@[inline] def resolveArtifactsCore? - [FromJson α] [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] - (f : α → m (Option β)) (out : Json) -: m (Option β) := do - match fromJson? out with - | .ok (a : α) => f a - | .error e => error e - -@[inline] def resolveArtifact - [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] (output : Json) -: m (Option Artifact) := resolveArtifactsCore? (do (← getLakeCache).getArtifact? ·) output - -@[inline] def resolveOutput? - [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] (output : Json) -: m (Option Artifact) := do +@[inline] def resolveArtifactOutput? + [MonadWorkspace m] [MonadLiftT BaseIO m] [Monad m] (output : Json) +: m (Except String Artifact) := do match fromJson? output with - | .ok descr => getArtifact? descr - | .error e => error s!"unexpected output: {e}" + | .ok descr => (← getLakeCache).getArtifact descr |>.toBaseIO + | .error e => return .error s!"ill-formed artifact output `{output}`: {e}" instance - [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] -: ResolveOutputs m Artifact := ⟨resolveOutput?⟩ + [MonadWorkspace m] [MonadLiftT BaseIO m] [Monad m] +: ResolveOutputs m Artifact := ⟨resolveArtifactOutput?⟩ /-- Construct an artifact from a path outside the Lake artifact cache. diff --git a/src/lake/Lake/Build/Module.lean b/src/lake/Lake/Build/Module.lean index 08b827cfc0..4a8651992d 100644 --- a/src/lake/Lake/Build/Module.lean +++ b/src/lake/Lake/Build/Module.lean @@ -545,30 +545,32 @@ public def Module.cacheOutputHashes (mod : Module) : IO PUnit := do if Lean.Internal.hasLLVMBackend () then cacheFileHash mod.bcFile -private def ModuleOutputDescrs.getArtifactsFrom? +def ModuleOutputDescrs.getArtifactsFrom (cache : Cache) (descrs : ModuleOutputDescrs) -: BaseIO (Option ModuleOutputArtifacts) := OptionT.run do - let mut arts : ModuleOutputArtifacts := { - olean := ← cache.getArtifact? descrs.olean - ilean := ← cache.getArtifact? descrs.ilean - c :=← cache.getArtifact? descrs.c +: EIO String ModuleOutputArtifacts := do + let arts : ModuleOutputArtifacts := { + olean := ← cache.getArtifact descrs.olean + oleanServer? := ← descrs.oleanServer?.mapM cache.getArtifact + oleanPrivate? := ← descrs.oleanPrivate?.mapM cache.getArtifact + ir? := ← descrs.ir?.mapM cache.getArtifact + ilean := ← cache.getArtifact descrs.ilean + c :=← cache.getArtifact descrs.c + bc? := none } - if let some hash := descrs.oleanServer? then - arts := {arts with oleanServer? := some (← cache.getArtifact? hash)} - if let some hash := descrs.oleanPrivate? then - arts := {arts with oleanPrivate? := some (← cache.getArtifact? hash)} - if let some hash := descrs.ir? then - arts := {arts with ir? := some (← cache.getArtifact? hash)} if Lean.Internal.hasLLVMBackend () then - arts := {arts with bc? := some (← cache.getArtifact? (← descrs.bc?))} - return arts + let some descr := descrs.bc? + | error "LLVM backend enabled but module outputs lack bitcode" + return {arts with bc? := some (← cache.getArtifact descr)} + else + return arts @[inline] def resolveModuleOutputs? [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] (outputs : Json) -: m (Option ModuleOutputArtifacts) := do +: m (Except String ModuleOutputArtifacts) := do match fromJson? outputs with - | .ok (descrs : ModuleOutputDescrs) => descrs.getArtifactsFrom? (← getLakeCache) - | .error e => error e + | .ok (descrs : ModuleOutputDescrs) => + descrs.getArtifactsFrom (← getLakeCache) |>.toBaseIO + | .error e => return .error s!"ill-formed module outputs: {e}" instance [MonadWorkspace m] [MonadLiftT BaseIO m] [MonadError m] [Monad m] diff --git a/src/lake/Lake/Config/Artifact.lean b/src/lake/Lake/Config/Artifact.lean index fcdc1968ff..691ef16806 100644 --- a/src/lake/Lake/Config/Artifact.lean +++ b/src/lake/Lake/Config/Artifact.lean @@ -40,11 +40,11 @@ attribute [deprecated artifactWithExt (since := "2025-08-30")] ArtifactDescr.mk namespace ArtifactDescr /-- Returns the relative file path used for the output's artifact in the Lake cache (c.f. `artifactPath`). -/ -@[inline] public def toFilePath (self : ArtifactDescr) : FilePath := +@[inline] public def relPath (self : ArtifactDescr) : FilePath := artifactPath self.hash self.ext -public instance : ToString ArtifactDescr := ⟨(·.toFilePath.toString)⟩ -public instance : ToJson ArtifactDescr := ⟨(toJson ·.toFilePath)⟩ +public instance : ToString ArtifactDescr := ⟨(·.relPath.toString)⟩ +public instance : ToJson ArtifactDescr := ⟨(toJson ·.relPath)⟩ /-- Parse an output's relative file path into an `ArtifactDescr`. -/ public def ofFilePath? (path : FilePath) : Except String ArtifactDescr := do diff --git a/src/lake/Lake/Config/Cache.lean b/src/lake/Lake/Config/Cache.lean index 6d73765011..55fdaa1ee9 100644 --- a/src/lake/Lake/Config/Cache.lean +++ b/src/lake/Lake/Config/Cache.lean @@ -215,7 +215,7 @@ namespace Cache /-- Returns the artifact in the Lake cache corresponding the given artifact description. -/ public def getArtifact? (cache : Cache) (descr : ArtifactDescr) : BaseIO (Option Artifact) := do - let path := cache.artifactDir / descr.toFilePath + let path := cache.artifactDir / descr.relPath if let .ok mtime ← getMTime path |>.toBaseIO then return some {descr, path, mtime} else if (← path.pathExists) then @@ -223,12 +223,22 @@ public def getArtifact? (cache : Cache) (descr : ArtifactDescr) : BaseIO (Option else return none +/-- Returns the artifact in the Lake cache corresponding the given artifact description. Errors if missing. -/ +public def getArtifact (cache : Cache) (descr : ArtifactDescr) : EIO String Artifact := do + let path := cache.artifactDir / descr.relPath + if let .ok mtime ← getMTime path |>.toBaseIO then + return {descr, path, mtime} + else if (← path.pathExists) then + return {descr, path, mtime := 0} + else + error s!"artifact not found in cache: {path}" + /-- Returns path to the artifact for each output. Errors if any are missing. -/ public def getArtifactPaths (cache : Cache) (descrs : Array ArtifactDescr) : LogIO (Vector FilePath descrs.size) := throwIfLogs do (Vector.mk descrs rfl).mapM fun out => do - let art := cache.artifactDir / out.toFilePath + let art := cache.artifactDir / out.relPath unless (← art.pathExists) do logError s!"artifact not found in cache: {art}" return art @@ -325,7 +335,7 @@ public def downloadArtifact (service : CacheService) (force := false) : LoggerIO Unit := do let url := service.artifactUrl descr.hash scope - let path := cache.artifactDir / descr.toFilePath + let path := cache.artifactDir / descr.relPath if (← path.pathExists) && !force then return logInfo s!"\ diff --git a/src/lake/tests/cache/test.sh b/src/lake/tests/cache/test.sh index 6ee3cd79ec..6abf27ba75 100755 --- a/src/lake/tests/cache/test.sh +++ b/src/lake/tests/cache/test.sh @@ -153,5 +153,19 @@ test_cached +Module:olean.server ! test_cached +Module:olean.private ! test_cached +Module:ir ! +# Verify that invalid outputs do not break Lake +if command -v jq > /dev/null; then # skip if no jq found + libPath=$($LAKE query Test:static) + test_cmd rm -f $libPath + inputHash=$(jq -r '.depHash' $libPath.trace) + echo $inputHash + echo bogus > "$CACHE_DIR/outputs/test/$inputHash.json" + test_out 'invalid JSON' build Test:static + test_cmd rm -f $libPath + echo '"bogus"' > "$CACHE_DIR/outputs/test/$inputHash.json" + test_out 'some output(s) have issues' build Test:static + test_exp -f $libPath +fi + # Cleanup rm -f produced.out Ignored.lean