From 5d8498888bc5dceb5bb867299b26c32ef8e2a5f7 Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Mon, 29 Sep 2025 20:57:45 -0400 Subject: [PATCH] feat: lake: use system cache for bootstrap (#10621) This PR alters the Lake directory detection so that the core build (i.e., `bootstrap = true`) is stored in the user cache directory (if available) and never in a toolchain-specific directory. It is also fixes some issues with cache environment configuration discovered along the way. --- src/lake/Lake/Config/Env.lean | 79 +++++++++++++++++++++-------- src/lake/Lake/Config/Workspace.lean | 8 +-- src/lake/Lake/Load/Resolve.lean | 2 +- src/lake/tests/env/test.sh | 2 + 4 files changed, 65 insertions(+), 26 deletions(-) diff --git a/src/lake/Lake/Config/Env.lean b/src/lake/Lake/Config/Env.lean index 77c41ca4c2..e99a9133bc 100644 --- a/src/lake/Lake/Config/Env.lean +++ b/src/lake/Lake/Config/Env.lean @@ -41,19 +41,28 @@ public structure Env where noCache : Bool /-- Whether the Lake artifact cache should be enabled by default (i.e., `LAKE_ARTIFACT_CACHE`). -/ enableArtifactCache : Bool + /-- Whether the system cache has been disabled (`LAKE_CACHE_DIR` is set but empty). -/ + noSystemCache : Bool := false /-- - The system directory for the Lake cache. Set by `LAKE_CACHE_DIR`. + The directory for the Lake cache. Customized by `LAKE_CACHE_DIR`. + If `none`, no suitable system directory for the cache exists. -/ - lakeCache? : Option Cache + lakeCache? : Option Cache := none + /-- + The directory for the Lake cache. Customized by `LAKE_CACHE_DIR`. + + Unlike `lakeCache?`, this excludes the toolchain directory from consideration. + + If `none`, no suitable system directory for the cache exists. + -/ + lakeSystemCache? : Option Cache := none /-- The authentication key for cache uploads (i.e., `LAKE_CACHE_KEY`). -/ cacheKey? : Option String /-- The base URL for artifact uploads and downloads from the cache (i.e., `LAKE_CACHE_ARTIFACT_ENDPOINT`). -/ cacheArtifactEndpoint? : Option String /-- The base URL for revision uploads and downloads from the cache (i.e., `LAKE_CACHE_REVISION_ENDPOINT`). -/ cacheRevisionEndpoint? : Option String - /-- The initial Elan toolchain of the environment (i.e., `ELAN_TOOLCHAIN`). -/ - initToolchain : String /-- The initial Lean library search path of the environment (i.e., `LEAN_PATH`). -/ initLeanPath : SearchPath /-- The initial Lean source search path of the environment (i.e., `LEAN_SRC_PATH`). -/ @@ -65,7 +74,7 @@ public structure Env where /-- The preferred toolchain of the environment. May be empty. - Either `initToolchain` or, if none, Lake's `Lean.toolchain`. + Either `ELAN_TOOLCHAIN` or, if none, Lake's `Lean.toolchain`. -/ toolchain : String deriving Inhabited @@ -92,26 +101,45 @@ public def getSystemCacheHome? : BaseIO (Option FilePath) := do else return none +def ElanInstall.lakeToolchainCache (elan : ElanInstall) (toolchain : String) : Cache := + ⟨elan.toolchainDir toolchain / "lake" / "cache"⟩ + +@[inline] def ElanInstall.lakeToolchainCache? (elan : ElanInstall) (toolchain : String) : Option Cache := + if toolchain.isEmpty then + none + else + some (elan.lakeToolchainCache toolchain) + namespace Env /-- Compute the Lean toolchain string used by Lake from the process environment. -/ public def computeToolchain : BaseIO String := do return (← IO.getEnv "ELAN_TOOLCHAIN").getD Lean.toolchain +def computeEnvCache? : BaseIO (Option Cache) := OptionT.run do + let cacheDir ← IO.getEnv "LAKE_CACHE_DIR" + guard cacheDir.isEmpty + return ⟨cacheDir⟩ + +def computeSystemCache? : BaseIO (Option Cache) := do + return (← getSystemCacheHome?).map (⟨· / "lake"⟩) + +def computeToolchainCache? (elan? : Option ElanInstall) (toolchain : String) : Option Cache := do + let elan ← elan? + guard !toolchain.isEmpty + return elan.lakeToolchainCache toolchain + /-- Compute the system cache location from the process environment. If `none`, no system directory for workspace the cache exists. -/ public def computeCache? (elan? : Option ElanInstall) (toolchain : String) : BaseIO (Option Cache) := do - if let some cacheDir ← IO.getEnv "LAKE_CACHE_DIR" then - if cacheDir.isEmpty then - return none - else - return some ⟨cacheDir⟩ - else if let some elan := elan? then - return some ⟨elan.toolchainDir toolchain / "lake" / "cache"⟩ - else if let some cacheHome ← getSystemCacheHome? then - return some ⟨cacheHome / "lake"⟩ + if let some cache ← computeEnvCache? then + return some cache + else if let some cache := computeToolchainCache? elan? toolchain then + return some cache + else if let some cache ← computeSystemCache? then + return some cache else return none @@ -124,28 +152,36 @@ public def compute (noCache : Option Bool := none) : EIO String Env := do let reservoirBaseUrl ← getUrlD "RESERVOIR_API_BASE_URL" "https://reservoir.lean-lang.org/api" - let elanToolchain? ← IO.getEnv "ELAN_TOOLCHAIN" - let initToolchain := elanToolchain?.getD "" - let toolchain := elanToolchain?.getD Lean.toolchain - return { + let toolchain ← computeToolchain + addCacheDirs toolchain { lake, lean, elan?, pkgUrlMap := ← computePkgUrlMap reservoirApiUrl := ← getUrlD "RESERVOIR_API_URL" s!"{reservoirBaseUrl}/v1" noCache := (noCache <|> (← IO.getEnv "LAKE_NO_CACHE").bind envToBool?).getD false enableArtifactCache := (← IO.getEnv "LAKE_ARTIFACT_CACHE").bind envToBool? |>.getD false - lakeCache? := ← computeCache? elan? toolchain cacheKey? := (← IO.getEnv "LAKE_CACHE_KEY").map (·.trim) cacheArtifactEndpoint? := (← IO.getEnv "LAKE_CACHE_ARTIFACT_ENDPOINT").map normalizeUrl cacheRevisionEndpoint? := (← IO.getEnv "LAKE_CACHE_REVISION_ENDPOINT").map normalizeUrl githashOverride := (← IO.getEnv "LEAN_GITHASH").getD "" toolchain - initToolchain initLeanPath := ← getSearchPath "LEAN_PATH", initLeanSrcPath := ← getSearchPath "LEAN_SRC_PATH", initSharedLibPath := ← getSearchPath sharedLibPathEnvVar, initPath := ← getSearchPath "PATH" } where + addCacheDirs toolchain env := do + if let some dir ← IO.getEnv "LAKE_CACHE_DIR" then + if dir.isEmpty then + return {env with noSystemCache := true} + else + return {env with lakeCache? := some ⟨dir⟩, lakeSystemCache? := some ⟨dir⟩} + else if let some cache := computeToolchainCache? elan? toolchain then + return {env with lakeCache? := some cache, lakeSystemCache? := ← computeSystemCache?} + else if let some cache ← computeSystemCache? then + return {env with lakeCache? := some cache, lakeSystemCache? := some cache} + else + return env computePkgUrlMap := do let some urlMapStr ← IO.getEnv "LAKE_PKG_URL_MAP" | return {} match Json.parse urlMapStr |>.bind fromJson? with @@ -210,10 +246,11 @@ public def sharedLibPath (env : Env) : SearchPath := env.lean.sharedLibPath ++ env.initSharedLibPath /-- Unset toolchain-specific environment variables. -/ -public def noToolchainVars : Array (String × Option String) := +public def noToolchainVars (env : Env) : Array (String × Option String) := #[ ("ELAN_TOOLCHAIN", none), ("LAKE", none), + ("LAKE_CACHE_DIR", if env.noSystemCache then some "" else env.lakeSystemCache?.map (·.dir.toString)), ("LAKE_OVERRIDE_LEAN", none), ("LAKE_HOME", none), ("LEAN", none), diff --git a/src/lake/Lake/Config/Workspace.lean b/src/lake/Lake/Config/Workspace.lean index 3f25654f5f..66731a33d8 100644 --- a/src/lake/Lake/Config/Workspace.lean +++ b/src/lake/Lake/Config/Workspace.lean @@ -25,6 +25,10 @@ public structure Workspace : Type where root : Package /-- The detected `Lake.Env` of the workspace. -/ lakeEnv : Lake.Env + /-- The Lake cache. -/ + lakeCache : Cache := + if root.bootstrap then lakeEnv.lakeSystemCache?.getD ⟨root.lakeDir / "cache"⟩ + else lakeEnv.lakeCache?.getD ⟨root.lakeDir / "cache"⟩ /-- The CLI arguments Lake was run with. Used by `lake update` to perform a restart of Lake on a toolchain update. @@ -69,10 +73,6 @@ namespace Workspace @[inline] public def lakeDir (self : Workspace) : FilePath := self.root.lakeDir -/-- The Lake cache. -/ -@[inline] public def lakeCache (ws : Workspace) : Cache := - ws.lakeEnv.lakeCache?.getD ⟨ws.lakeDir⟩ - /-- Whether the Lake artifact cache should be enabled by default for packages in the workspace. -/ @[inline] public def enableArtifactCache (ws : Workspace) : Bool := ws.lakeEnv.enableArtifactCache diff --git a/src/lake/Lake/Load/Resolve.lean b/src/lake/Lake/Load/Resolve.lean index 0d0383ce6c..e98f6632a2 100644 --- a/src/lake/Lake/Load/Resolve.lean +++ b/src/lake/Lake/Load/Resolve.lean @@ -256,7 +256,7 @@ def Workspace.updateToolchain let child ← IO.Process.spawn { cmd := elanInstall.elan.toString args := #["run", "--install", tc.toString, "lake"] ++ lakeArgs - env := Env.noToolchainVars + env := ws.lakeEnv.noToolchainVars } IO.Process.exit (← child.wait).toUInt8 else diff --git a/src/lake/tests/env/test.sh b/src/lake/tests/env/test.sh index c6ff16c72b..a23cfc118f 100755 --- a/src/lake/tests/env/test.sh +++ b/src/lake/tests/env/test.sh @@ -36,6 +36,8 @@ LAKE_CACHE_DIR= test_run env -d ../../examples/hello env printenv LAKE_CACHE_DIR # Test that `env` preserves the input environment for certain variables echo "# TEST: Setting variables for lake env" test_eq "foo" env env ELAN_TOOLCHAIN=foo $LAKE env printenv ELAN_TOOLCHAIN +test_out "foo" env env -u LAKE_CACHE_DIR ELAN_HOME=/ ELAN_TOOLCHAIN=foo \ + $LAKE env printenv LAKE_CACHE_DIR LAKE_CACHE_DIR=foo test_eq "foo" env printenv LAKE_CACHE_DIR LEAN_GITHASH=foo test_eq "foo" env printenv LEAN_GITHASH LEAN_AR=foo test_eq "foo" env printenv LEAN_AR