From ea8fca2d9fa3bc5cd65aaf0b3e4f43e6a4cd81c0 Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Sun, 15 Mar 2026 22:29:44 -0400 Subject: [PATCH] refactor: lake: download arts by default in `cache get` (#12927) This PR changes `lake cache get` to download artifacts by default. Artifacts can be downloaded on demand with the new `--mappings-only` option (`--download-arts` is now obsolete). In the future, the plan is to have Lake download mappings when cloning dependencies. Then, `lake cache get` will primarily be used to download artifacts eagerly. Thus, it makes sense to have that as the default. --- src/lake/Lake/CLI/Help.lean | 9 ++++----- src/lake/Lake/CLI/Main.lean | 14 ++++++++++---- tests/lake/tests/cache/online-test.sh | 16 ++++++++++------ 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/lake/Lake/CLI/Help.lean b/src/lake/Lake/CLI/Help.lean index 105d77844a..af4d2363ab 100644 --- a/src/lake/Lake/CLI/Help.lean +++ b/src/lake/Lake/CLI/Help.lean @@ -377,7 +377,7 @@ OPTIONS: --platform= with Reservoir or --repo, sets the platform --toolchain= with Reservoir or --repo, sets the toolchain --scope= scope for a custom endpoint - --download-arts download artifacts now, not on demand + --mappings-only only download mappings, delay artifacts --force-download redownload existing files Downloads build outputs for packages in the workspace from a remote cache @@ -408,10 +408,9 @@ artifacts. If no mappings are found, Lake will backtrack the Git history up to `--max-revs`, looking for a revision with mappings. If `--max-revs` is 0, Lake will search the repository's entire history (or as far as Git will allow). -With a named service and without a mappings file, Lake will only download -the input-to-output mappings for packages. It will delay downloading of the -corresponding artifacts to the next `lake build` that requires them. Using -`--download-arts` will force Lake to download all artifacts eagerly. +By default, Lake will download both the input-to-output mappings and the +output artifacts for a package. By using `--mappings-onlys`, Lake will only +download the mappings abd delay downloading artifacts until they are needed. If a download for an artifact fails or the download process for a whole package fails, Lake will report this and continue on to the next. Once done, diff --git a/src/lake/Lake/CLI/Main.lean b/src/lake/Lake/CLI/Main.lean index 1a56e9f51e..5cd9c81163 100644 --- a/src/lake/Lake/CLI/Main.lean +++ b/src/lake/Lake/CLI/Main.lean @@ -64,7 +64,7 @@ public structure LakeOptions where offline : Bool := false outputsFile? : Option FilePath := none forceDownload : Bool := false - downloadArts : Bool := false + mappingsOnly : Bool := false service? : Option String := none scope? : Option CacheServiceScope := none platform? : Option CachePlatform := none @@ -249,7 +249,8 @@ def lakeLongOption : (opt : String) → CliM PUnit | "--wfail" => modifyThe LakeOptions ({· with failLv := .warning}) | "--iofail" => modifyThe LakeOptions ({· with failLv := .info}) | "--force-download" => modifyThe LakeOptions ({· with forceDownload := true}) -| "--download-arts" => modifyThe LakeOptions ({· with downloadArts := true}) +| "--download-arts" => modifyThe LakeOptions ({· with mappingsOnly := false}) +| "--mappings-only" => modifyThe LakeOptions ({· with mappingsOnly := true}) | "--service" => do let service ← takeOptArg "--service" "service name" modifyThe LakeOptions ({· with service? := some service}) @@ -409,6 +410,8 @@ protected def get : CliM PUnit := do let ws ← loadWorkspace cfg let cache := ws.lakeCache if let some file := mappings? then liftM (m := LoggerIO) do + if opts.mappingsOnly then + error "`--mappings-only` is not supported with a mappings file; use `lake cache add` instead" if opts.platform?.isSome || opts.toolchain?.isSome then logWarning "the `--platform` and `--toolchain` options do nothing for `cache get` with a mappings file" if opts.failLv ≤ .warning then @@ -441,6 +444,9 @@ protected def get : CliM PUnit := do match ws.lakeEnv.cacheArtifactEndpoint?, ws.lakeEnv.cacheRevisionEndpoint? with | some artifactEndpoint, some revisionEndpoint => logWarning endpointDeprecation + if opts.mappingsOnly then + error "`--mappings-only` requires services to be configured + via the Lake system configuration (not enviroment variables)" return .downloadService artifactEndpoint revisionEndpoint ws.lakeEnv.cacheService? | none, none => return ws.defaultCacheService @@ -469,7 +475,7 @@ protected def get : CliM PUnit := do else findOutputs cache service pkg remoteScope opts platform toolchain cache.writeMap pkg.cacheScope map service.name? (some remoteScope) - if opts.downloadArts || service.name?.isNone then + unless opts.mappingsOnly do let descrs ← map.collectOutputDescrs service.downloadArtifacts descrs cache remoteScope opts.forceDownload else if service.isReservoir then @@ -483,7 +489,7 @@ protected def get : CliM PUnit := do try let map ← findOutputs cache service pkg remoteScope opts platform toolchain cache.writeMap pkg.cacheScope map service.name? (some remoteScope) - if opts.downloadArts || service.name?.isNone then + unless opts.mappingsOnly do let descrs ← map.collectOutputDescrs service.downloadArtifacts descrs cache remoteScope opts.forceDownload return ok diff --git a/tests/lake/tests/cache/online-test.sh b/tests/lake/tests/cache/online-test.sh index ce9d0472ad..61cecb5b29 100755 --- a/tests/lake/tests/cache/online-test.sh +++ b/tests/lake/tests/cache/online-test.sh @@ -58,6 +58,10 @@ echo "# TESTS" test_err "must contain exactly one '/'" cache get --repo='invalid' test_err 'invalid characters in repository name' cache get --repo='!/invalid' +# Test `--mappings-only` validation +test_err '`--mappings-only` is not supported' cache get --mappings-only bogus.jsonl +with_cdn_endpoints test_err '`--mappings-only` requires services' cache get --mappings-only + # Test `--service` validation test_err 'service `bogus` not found in system configuration' \ cache get --service='bogus' @@ -131,21 +135,21 @@ LAKE_CONFIG=services.toml test_run build +Test --no-build -v with_cdn_endpoints test_not_out "downloading" \ cache get .lake/outputs.jsonl --scope='!/test' with_cdn_endpoints test_not_out "downloading artifact" \ - cache get --download-arts --scope='!/test' + cache get --scope='!/test' with_cdn_endpoints test_not_out "downloading" \ - cache get --download-arts --scope='!/test' + cache get --scope='!/test' # Test that the revision cache directory for the package is properly created test_exp -d $LAKE_CACHE_DIR/revisions/test # Test cache get with custom endpoint using `--force-download` with_cdn_endpoints test_out "downloading" \ - cache get --scope='!/test' --force-download --download-arts + cache get --scope='!/test' --force-download test_run build +Test --no-build # Test download service configuration through system configuration LAKE_CONFIG=services.toml test_out "downloading" \ - cache get --scope='!/test' --force-download --download-arts --service=cdn + cache get --scope='!/test' --force-download --service=cdn test_run build +Test --no-build # Test cache put/get with a set platform/toolchain @@ -189,13 +193,13 @@ test_cmd rm -rf .lake/packages/Cli/.lake/build "$LAKE_CACHE_DIR" test_fails -f reservoir.toml build @Cli --no-build test_err "failed to download artifacts for some dependencies" \ -f reservoir2.toml cache get --max-revs=1 -test_run -f reservoir.toml cache get --max-revs=1 +test_run -f reservoir.toml cache get --max-revs=1 --mappings-only test_run -f reservoir.toml build @Cli --no-build # downloads arts # Test Reservoir with `--repo` uses GitHub scope test_cmd rm -rf .lake/packages/Cli/.lake/build "$LAKE_CACHE_DIR" test_run -d .lake/packages/Cli cache get --repo=leanprover/lean4-cli --max-revs=1 -test_run -d .lake/packages/Cli build --no-build # downloads arts +test_run -d .lake/packages/Cli build --no-build # Cleanup rm -rf .git produced.out