diff --git a/src/lake/Lake/CLI/Help.lean b/src/lake/Lake/CLI/Help.lean index af4d2363ab..8c567cade0 100644 --- a/src/lake/Lake/CLI/Help.lean +++ b/src/lake/Lake/CLI/Help.lean @@ -361,6 +361,11 @@ COMMANDS: clean removes ALL froms the local Lake cache services print configured remote cache services +STAGING COMMANDS: + stage copy build outputs from the cache to a directory + unstage cache build outputs from a staging directory + put-staged upload build outputs from a staging directory + See `lake cache help ` for more information on a specific command." def helpCacheGet := @@ -454,7 +459,7 @@ full scope). As such, the command will warn if the work tree currently has changes." def helpCacheAdd := -"Addd input-to-output mappings to the Lake cache +"Add input-to-output mappings to the Lake cache USAGE: lake cache add @@ -476,6 +481,48 @@ to avoid clashes. For Reservoir, this scope can either be a package (set via `--scope`) or a repository (set via `--repo`). For S3 services, both options are synonymous." +def helpCacheStage := +"Copy build outputs from the cache to a staging directory + +USAGE: + lake cache stage + +Creates the staging directory and copies the mappings file to it. Then, it +copies all artifacts described within the mappings file from the cache to the +staging directory. Errors if any of the artifacts described cannot be found in +the cache." + +def helpCacheUnstage := +"Cache build outputs from a staging directory + +USAGE: + lake cache unstage + +Copies the mappings and artifacts stored in staging directory (e.g., via +`lake cache stage`) back into the cache. + +Reads the mappings file located at `outputs.jsonl` within the staging +directory and writes the mappings to the Lake cache. Then, it copies the +described artifacts from the staging directory into the cache." + +def helpCachePutStaged := +"Upload build outputs from a staging directory to a remote service + +USAGE: + lake cache put-staged + +OPTIONS: + --scope= verbatim scope + --repo= scope with repository + toolchain & platform + --toolchain= with --repo, sets the toolchain + --platform= with --repo, sets the platform + +Works like `lake cache put` but uploads outputs from the staging directory +instead of the Lake cache. Does not configure the workspace and thus does not +execute arbitrary user code. However, because of this, the package's platform +and toolchain settings will not be automatically detected and must be +specified manually via `--platform` and `--toolchain` (if desired)." + def helpCacheClean := "Removes ALL files from the local Lake cache @@ -641,6 +688,9 @@ public def helpCache : (cmd : String) → String | "get" => helpCacheGet | "put" => helpCachePut | "add" => helpCacheAdd +| "stage" => helpCacheStage +| "unstage" => helpCacheUnstage +| "putStaged" => helpCachePutStaged | "clean" => helpCacheClean | "services" => helpCacheServices | _ => helpCacheCli diff --git a/src/lake/Lake/CLI/Main.lean b/src/lake/Lake/CLI/Main.lean index a46a9c0a47..d2523fc48a 100644 --- a/src/lake/Lake/CLI/Main.lean +++ b/src/lake/Lake/CLI/Main.lean @@ -523,61 +523,82 @@ where error s!"{remoteScope}: no outputs found {revisions}" return map -protected def put : CliM PUnit := do - processOptions lakeOption - let file ← takeArg "mappings" - let opts ← getThe LakeOptions - let some scope := opts.scope? - | error "the `--scope` or `--repo` option must be set for `cache put`" - noArgsRem do - let cfg ← mkLoadConfig opts - let ws ← loadWorkspace cfg - let pkg := ws.root - let platform := cachePlatform pkg (opts.platform?.getD .system) - let toolchain := cacheToolchain pkg (opts.toolchain?.getD ws.cacheToolchain) - let service : CacheService ← id do - if let some service := opts.service? then - let some service := ws.findCacheService? service - | error (serviceNotFound service ws.lakeConfig.config.cache.services) - let some key := ws.lakeEnv.cacheKey? - | error "uploads require an authentication key configured through `LAKE_CACHE_KEY`" - return service.withKey key - else - match ws.lakeEnv.cacheKey?, ws.lakeEnv.cacheArtifactEndpoint?, ws.lakeEnv.cacheRevisionEndpoint? with - | some key, some artifactEndpoint, some revisionEndpoint => - logWarning endpointDeprecation - return .uploadService key artifactEndpoint revisionEndpoint - | key?, none, none => - if let some service := ws.defaultCacheUploadService? then - let some key := key? - | error "uploads require an authentication key configured through `LAKE_CACHE_KEY`" - return service.withKey key - else - error "no default upload service configured; the `--service` option must be set for `cache put`" - | key?, artifactEndpoint?, revisionEndpoint? => - logWarning endpointDeprecation - error (invalidEndpointConfig key? artifactEndpoint? revisionEndpoint?) - let repo := GitRepo.mk pkg.dir - if (← repo.hasDiff) then - logWarning s!"{pkg.prettyName}: package has changes; \ - artifacts will be uploaded for the most recent commit" - if opts.failLv ≤ .warning then - exit 1 - let rev ← repo.getHeadRevision - let map ← CacheMap.load file - let descrs ← map.collectOutputDescrs - let paths ← ws.lakeCache.getArtifactPaths descrs - service.uploadArtifacts ⟨descrs, rfl⟩ paths scope - -- Mappings are uploaded after artifacts to allow downloads to assume that - -- if the mappings exist, the artifacts should also exist - service.uploadRevisionOutputs rev file scope platform toolchain +private def computeUploadService + (service? : Option String) (lakeEnv : Env) (lakeCfg : LoadedLakeConfig) +: CliStateM CacheService := do + if let some service := service? then + let some service := lakeCfg.cacheServices.find? (.mkSimple service) + | error (serviceNotFound service lakeCfg.config.cache.services) + let some key := lakeEnv.cacheKey? + | error "uploads require an authentication key configured through `LAKE_CACHE_KEY`" + return service.withKey key + else + match lakeEnv.cacheKey?, lakeEnv.cacheArtifactEndpoint?, lakeEnv.cacheRevisionEndpoint? with + | some key, some artifactEndpoint, some revisionEndpoint => + return .uploadService key artifactEndpoint revisionEndpoint + | key?, none, none => + if let some service := lakeCfg.defaultCacheUploadService? then + let some key := key? + | error "uploads require an authentication key configured through `LAKE_CACHE_KEY`" + return service.withKey key + else + error "no default upload service configured; the `--service` option must be set" + | key?, artifactEndpoint?, revisionEndpoint? => + error (invalidEndpointConfig key? artifactEndpoint? revisionEndpoint?) where invalidEndpointConfig key? artifactEndpoint? revisionEndpoint? := s!"invalid endpoint configuration:\ \n LAKE_CACHE_KEY is {if key?.isNone then "unset" else "set"}\ \n LAKE_CACHE_ARTIFACT_ENDPOINT={artifactEndpoint?.getD ""}\ \n LAKE_CACHE_REVISION_ENDPOINT={revisionEndpoint?.getD ""}\n\ - To use `cache put`, these environment variables must be set to non-empty strings." + To upload, these environment variables must be set to non-empty strings." + +private def computePackageRev (pkgDir : FilePath) : CliStateM String := do + let repo := GitRepo.mk pkgDir + if (← repo.hasDiff) then + logWarning s!"package has changes; \ + artifacts will be uploaded for the most recent commit" + if (← getThe LakeOptions).failLv ≤ .warning then + exit 1 + repo.getHeadRevision + +private def putCore + (rev : String) (outputs : FilePath) (artDir : FilePath) + (service : CacheService) (scope : CacheServiceScope) + (platform := CachePlatform.none) (toolchain := CacheToolchain.none) +: LoggerIO Unit := do + let map ← CacheMap.load outputs + let descrs ← map.collectOutputDescrs + let paths ← computeArtifactPaths descrs + service.uploadArtifacts ⟨descrs, rfl⟩ paths scope + -- Mappings are uploaded after artifacts to allow downloads to assume that + -- if the mappings exist, the artifacts should also exist + service.uploadRevisionOutputs rev outputs scope platform toolchain +where + computeArtifactPaths (descrs : Array ArtifactDescr) : LogIO (Vector FilePath descrs.size) := throwIfLogs do + (Vector.mk descrs rfl).mapM fun out => do + let art := artDir / out.relPath + unless (← art.pathExists) do + logError s!"artifact not found in cache: {art}" + return art + +protected def put : CliM PUnit := do + processOptions lakeOption + let file ← takeArg "mappings" + let opts ← getThe LakeOptions + let some scope := opts.scope? + | error "the `--scope` or `--repo` option must be set" + noArgsRem do + let cfg ← mkLoadConfig opts + let lakeEnv := cfg.lakeEnv + let pkg ← loadPackage cfg + let lakeCfg ← loadLakeConfig lakeEnv + let lakeCache := computeLakeCache pkg lakeEnv + let platform := cachePlatform pkg (opts.platform?.getD .system) + let toolchain := cacheToolchain pkg (opts.toolchain?.getD lakeEnv.cacheToolchain) + let service ← computeUploadService opts.service? lakeEnv lakeCfg + let rev ← opts.rev?.getDM (computePackageRev pkg.dir) + putCore rev file lakeCache.artifactDir service scope platform toolchain protected def add : CliM PUnit := do processOptions lakeOption @@ -602,6 +623,99 @@ protected def add : CliM PUnit := do let map ← CacheMap.load file ws.lakeCache.writeMap localScope map service? opts.scope? +private def stagingOutputsFile := "outputs.jsonl" + +protected def stage : CliM PUnit := do + processOptions lakeOption + let opts ← getThe LakeOptions + let mappingsFile ← FilePath.mk <$> takeArg "mappings" + let stagingDir ← FilePath.mk <$> takeArg "staging directory" + noArgsRem do + let cfg ← mkLoadConfig opts + let cache ← id do + if (← configFileExists cfg.configFile) then + return (← loadWorkspaceRoot cfg).lakeCache + else if let some cache := cfg.lakeEnv.lakeCache? then + return cache + else + error "no workspace configuration found and no system cache detected" + let map ← CacheMap.load mappingsFile + let descrs ← map.collectOutputDescrs + IO.FS.createDirAll stagingDir + copyFile mappingsFile (stagingDir / stagingOutputsFile) + let ok ← descrs.foldlM (init := true) fun ok descr => do + let cachePath := cache.artifactDir / descr.relPath + let stagingPath := stagingDir / descr.relPath + match (← copyFile cachePath stagingPath |>.toBaseIO) with + | .ok _ => + return ok + | .error (.noFileOrDirectory ..) => + logError s!"artifact not found in cache: {cachePath}" + return false + | .error e => + logError s!"failed to copy artifact: {e}" + return false + unless ok do + logError "failed to copy all outputs to the staging directory" + exit 1 + +protected def unstage : CliM PUnit := do + processOptions lakeOption + let opts ← getThe LakeOptions + let stagingDir ← FilePath.mk <$> takeArg "staging directory" + let pkg? ← takeArg? + noArgsRem do + let cfg ← mkLoadConfig opts + let ws ← loadWorkspace cfg + let pkg ← match pkg? with + | some pkg => parsePackageSpec ws pkg + | _ => pure ws.root + let localScope := pkg.cacheScope + if opts.scope?.isSome && opts.service?.isNone then + error "`--scope` and `--repo` require `--service`" + let service? ← id do + let some service := opts.service? + | return none + unless (ws.findCacheService? service).isSome do + error (serviceNotFound service ws.lakeConfig.config.cache.services) + return some (.ofString service) + let map ← CacheMap.load (stagingDir / stagingOutputsFile) + let descrs ← map.collectOutputDescrs + let artDir := ws.lakeCache.artifactDir + IO.FS.createDirAll artDir + let ok ← descrs.foldlM (init := true) fun ok descr => do + let cachePath := artDir/ descr.relPath + let stagingPath := stagingDir / descr.relPath + match (← copyFile stagingPath cachePath |>.toBaseIO) with + | .ok _ => + return ok + | .error (.noFileOrDirectory ..) => + logError s!"output artifact not found in staging directory: {stagingPath}" + return false + | .error e => + logError s!"failed to copy artifact: {e}" + return false + unless ok do + logError "failed to copy all outputs to the staging directory" + exit 1 + ws.lakeCache.writeMap localScope map service? opts.scope? + +protected def putStaged : CliM PUnit := do + processOptions lakeOption + let opts ← getThe LakeOptions + let stagingDir ← FilePath.mk <$> takeArg "staging directory" + let some scope := opts.scope? + | error "the `--scope` or `--repo` option must be set" + noArgsRem do + let cfg ← mkLoadConfig opts + let lakeCfg ← loadLakeConfig cfg.lakeEnv + let platform := opts.platform?.getD .none + let toolchain := opts.toolchain?.getD .none + let service ← computeUploadService opts.service? cfg.lakeEnv lakeCfg + let rev ← opts.rev?.getDM (computePackageRev cfg.wsDir) + let outputsFile := stagingDir / stagingOutputsFile + putCore rev outputsFile stagingDir service scope platform toolchain + protected def services : CliM PUnit := do processOptions lakeOption let opts ← getThe LakeOptions @@ -630,13 +744,16 @@ protected def help : CliM PUnit := do end cache def cacheCli : (cmd : String) → CliM PUnit -| "add" => cache.add -| "get" => cache.get -| "put" => cache.put -| "clean" => cache.clean -| "services" => cache.services -| "help" => cache.help -| cmd => throw <| CliError.unknownCommand cmd +| "add" => cache.add +| "get" => cache.get +| "put" => cache.put +| "stage" => cache.stage +| "unstage" => cache.unstage +| "put-staged" => cache.putStaged +| "clean" => cache.clean +| "services" => cache.services +| "help" => cache.help +| cmd => throw <| CliError.unknownCommand cmd /-! ### `lake script` CLI -/ diff --git a/src/lake/Lake/Config/Cache.lean b/src/lake/Lake/Config/Cache.lean index 28a61b65c4..7d34182ee1 100644 --- a/src/lake/Lake/Config/Cache.lean +++ b/src/lake/Lake/Config/Cache.lean @@ -421,19 +421,6 @@ public def getArtifact (cache : Cache) (descr : ArtifactDescr) : EIO String Arti | .error e => error s!"failed to retrieve artifact from cache: {e}" -/-- -**For internal use only.** -Returns path to the artifact for each output. Errors if any are missing. --/ -public def getArtifactPaths - (cache : Cache) (descrs : Array ArtifactDescr) -: LogIO (Vector FilePath descrs.size) := throwIfLogs do - (Vector.mk descrs rfl).mapM fun out => do - let art := cache.artifactDir / out.relPath - unless (← art.pathExists) do - logError s!"artifact not found in cache: {art}" - return art - /-- The directory where input-to-output mappings are stored in the Lake cache. -/ @[inline] public def outputsDir (cache : Cache) : FilePath := cache.dir / "outputs" diff --git a/src/lake/Lake/Config/LakeConfig.lean b/src/lake/Lake/Config/LakeConfig.lean index a2e1d6203c..9ebe30319c 100644 --- a/src/lake/Lake/Config/LakeConfig.lean +++ b/src/lake/Lake/Config/LakeConfig.lean @@ -50,6 +50,6 @@ public abbrev CacheServiceMap := NameMap CacheService public structure LoadedLakeConfig where config : LakeConfig defaultCacheService : CacheService - defaultUploadCacheService? : Option CacheService + defaultCacheUploadService? : Option CacheService cacheServices : CacheServiceMap deriving Nonempty diff --git a/src/lake/Lake/Config/Workspace.lean b/src/lake/Lake/Config/Workspace.lean index e2c6fad2a3..b9c01f1302 100644 --- a/src/lake/Lake/Config/Workspace.lean +++ b/src/lake/Lake/Config/Workspace.lean @@ -22,6 +22,13 @@ open Lean (Name LeanOptions) namespace Lake +/-- **For internal use only.** Computes the cache to use for the package based on the environment. -/ +public def computeLakeCache (pkg : Package) (lakeEnv : Lake.Env) : Cache := + if pkg.bootstrap then + lakeEnv.lakeSystemCache?.getD ⟨pkg.lakeDir / "cache"⟩ + else + lakeEnv.lakeCache?.getD ⟨pkg.lakeDir / "cache"⟩ + /-- A Lake workspace -- the top-level package directory. -/ public structure Workspace : Type where /-- The root package of the workspace. -/ @@ -31,9 +38,7 @@ public structure Workspace : Type where /-- The Lake configuration from the system configuration file. -/ lakeConfig : LoadedLakeConfig /-- The Lake cache. -/ - lakeCache : Cache := - if root.bootstrap then lakeEnv.lakeSystemCache?.getD ⟨root.lakeDir / "cache"⟩ - else lakeEnv.lakeCache?.getD ⟨root.lakeDir / "cache"⟩ + lakeCache : Cache := private_decl% computeLakeCache root lakeEnv /-- The CLI arguments Lake was run with. Used by {lit}`lake update` to perform a restart of Lake on a toolchain update. @@ -128,7 +133,7 @@ Returns the cache service (if any) used by default for uploads (e.g., for {lit}` This is configured through {lit}`cache.defaultUploadService` in the system Lake configuration. -/ @[inline] public def defaultCacheUploadService? (ws : Workspace) : Option CacheService := - ws.lakeConfig.defaultUploadCacheService? + ws.lakeConfig.defaultCacheUploadService? /-- Returns the configured cache service with the given name. diff --git a/src/lake/Lake/Load/Toml.lean b/src/lake/Lake/Load/Toml.lean index 6edff0df01..b9893aa31a 100644 --- a/src/lake/Lake/Load/Toml.lean +++ b/src/lake/Lake/Load/Toml.lean @@ -545,7 +545,7 @@ private def loadLakeConfigCore (path : FilePath) (lakeEnv : Lake.Env) : LogIO Lo | error s!"the configured default cache service `{name}` is not defined; \ please add a `cache.service` with that name" return service - let defaultUploadCacheService? ← id do + let defaultCacheUploadService? ← id do let name := config.cache.defaultUploadService if name.isEmpty then return none @@ -555,12 +555,12 @@ private def loadLakeConfigCore (path : FilePath) (lakeEnv : Lake.Env) : LogIO Lo please add a `cache.service` with that name" return some service if cacheServices.contains `reservoir then - return {config, defaultCacheService, defaultUploadCacheService?, cacheServices} + return {config, defaultCacheService, defaultCacheUploadService?, cacheServices} else let cacheServices := cacheServices.insert `reservoir defaultService let defaultServiceConfig := {name := "reservoir", kind := .reservoir, apiEndpoint := lakeEnv.reservoirApiUrl} let config := {config with cache.services := config.cache.services.push defaultServiceConfig} - return {config, defaultCacheService, defaultUploadCacheService?, cacheServices} + return {config, defaultCacheService, defaultCacheUploadService?, cacheServices} else errorWithLog <| errs.forM fun {ref, msg} => let pos := ictx.fileMap.toPosition <| ref.getPos?.getD 0 @@ -574,7 +574,7 @@ private def LoadedLakeConfig.mkDefault (lakeEnv : Lake.Env) : LoadedLakeConfig : { config.cache.services := #[defaultServiceConfig] defaultCacheService := defaultService - defaultUploadCacheService? := none + defaultCacheUploadService? := none cacheServices := NameMap.empty.insert `reservoir defaultService } diff --git a/tests/lake/tests/cache/online-test.sh b/tests/lake/tests/cache/online-test.sh index 61cecb5b29..9439996626 100755 --- a/tests/lake/tests/cache/online-test.sh +++ b/tests/lake/tests/cache/online-test.sh @@ -80,15 +80,27 @@ LAKE_CACHE_REVISION_ENDPOINT=bogus test_err 'both environment variables must be # Test `cache put` command errors for bad configurations with_upload_endpoints test_err 'the `--scope` or `--repo` option must be set' cache put bogus.jsonl -test_err 'the `--service` option must be set for `cache put`' \ +test_err 'the `--service` option must be set' \ cache put bogus.jsonl --scope='bogus' -LAKE_CACHE_KEY= test_err 'the `--service` option must be set for `cache put`' \ +LAKE_CACHE_KEY= test_err 'the `--service` option must be set' \ cache put bogus.jsonl --scope='bogus' LAKE_CACHE_REVISION_ENDPOINT=bogus test_err 'these environment variables must be set' \ cache put bogus.jsonl --scope='bogus' LAKE_CACHE_REVISION_ENDPOINT=bogus test_err 'these environment variables must be set' \ cache put bogus.jsonl --scope='bogus' +# Test `cache put-staged` command errors for bad configurations +with_upload_endpoints test_err 'the `--scope` or `--repo` option must be set' \ + cache put-staged bogus +test_err 'the `--service` option must be set' \ + cache put-staged bogus --scope='bogus' +LAKE_CACHE_KEY= test_err 'the `--service` option must be set' \ + cache put-staged bogus --scope='bogus' +LAKE_CACHE_REVISION_ENDPOINT=bogus test_err 'these environment variables must be set' \ + cache put-staged bogus --scope='bogus' +LAKE_CACHE_REVISION_ENDPOINT=bogus test_err 'these environment variables must be set' \ + cache put-staged bogus --scope='bogus' + # Test `cache add` command errors for bad configurations test_err '`--scope` and `--repo` require `--service`' \ cache add bogus.jsonl --scope='bogus' @@ -120,6 +132,12 @@ LAKE_CONFIG=services.toml test_err "failed to upload artifact" \ # Test cache put with a custom endpoint with_upload_endpoints test_run cache put .lake/outputs.jsonl --scope='!/test' + +# Test cache put-staged with a custom endpoint +test_run cache stage .lake/outputs.jsonl .lake/staging +with_upload_endpoints test_run cache put-staged .lake/staging --scope='!/test' + +# Remove local artifacts test_cmd rm -rf .lake/build "$LAKE_CACHE_DIR" # Test download failure with a bogus scope diff --git a/tests/lake/tests/cache/test.sh b/tests/lake/tests/cache/test.sh index 640aaf6aaf..4c0134409f 100755 --- a/tests/lake/tests/cache/test.sh +++ b/tests/lake/tests/cache/test.sh @@ -201,6 +201,33 @@ test_lines 6 .lake/outputs.jsonl test_run -f platformIndependent.toml build Test:static -o .lake/outputs.jsonl test_lines 3 .lake/outputs.jsonl +# Test `lake cache stage` and `lake cache unstage` +test_run build Test:static -o .lake/outputs.jsonl +test_lines 6 .lake/outputs.jsonl + +# Verify `cache stage` copies artifacts to the staging directory +test_run cache stage .lake/outputs.jsonl .lake/staging +test_exp -d .lake/staging +test_exp -f .lake/staging/outputs.jsonl +test_cmd cmp -s .lake/outputs.jsonl .lake/staging/outputs.jsonl + +# Verify `cache unstage` restores artifacts to the cache +test_cmd rm -rf "$CACHE_DIR/artifacts" +test_exp ! -d "$CACHE_DIR/artifacts" +test_run cache unstage .lake/staging +test_exp -d "$CACHE_DIR/artifacts" +test_run build Test:static --no-build + +# Verify that `cache stage` fails if cached artifacts are missing +test_cmd rm -rf "$CACHE_DIR/artifacts" +test_err 'artifact not found in cache' cache stage .lake/outputs.jsonl .lake/staging-fail +test_run cache unstage .lake/staging + +# Verify that `cache unstage` fails if staging artifacts are missing +test_cmd mkdir -p .lake/staging-empty +test_cmd cp .lake/outputs.jsonl .lake/staging-empty/outputs.jsonl +test_err 'artifact not found in staging directory' cache unstage .lake/staging-empty + # Verify that `lake cache clean` deletes the cache directory test_exp -d "$CACHE_DIR" test_cmd cp -r "$CACHE_DIR" .lake/cache-backup