refactor: lake: introduce GitRev (#13456)

This PR adds a type abbreviation `GitRev` to Lake, which is used for
`String` values that signify Git revisions. Such revisions may be a SHA1
commit hash, a branch name, or one of Git's more complex specifiers.

The PR also adds a number of additional Git primitives which are useful
for #11662.
This commit is contained in:
Mac Malone 2026-04-17 23:44:26 -04:00 committed by GitHub
parent d3c069593b
commit 43e1e8285b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 107 additions and 41 deletions

View file

@ -69,7 +69,7 @@ public structure LakeOptions where
scope? : Option CacheServiceScope := none scope? : Option CacheServiceScope := none
platform? : Option CachePlatform := none platform? : Option CachePlatform := none
toolchain? : Option CacheToolchain := none toolchain? : Option CacheToolchain := none
rev? : Option String := none rev? : Option GitRev := none
maxRevs : Nat := 100 maxRevs : Nat := 100
shake : Shake.Args := {} shake : Shake.Args := {}
@ -563,7 +563,7 @@ private def computePackageRev (pkgDir : FilePath) : CliStateM String := do
repo.getHeadRevision repo.getHeadRevision
private def putCore private def putCore
(rev : String) (outputs : FilePath) (artDir : FilePath) (rev : GitRev) (outputs : FilePath) (artDir : FilePath)
(service : CacheService) (scope : CacheServiceScope) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none) (platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: LoggerIO Unit := do : LoggerIO Unit := do

View file

@ -7,6 +7,7 @@ module
prelude prelude
import Init.Control.Do import Init.Control.Do
public import Lake.Util.Git
public import Lake.Util.Log public import Lake.Util.Log
public import Lake.Util.Version public import Lake.Util.Version
public import Lake.Config.Artifact public import Lake.Config.Artifact
@ -469,7 +470,7 @@ public def readOutputs? (cache : Cache) (scope : String) (inputHash : Hash) : Lo
cache.dir / "revisions" cache.dir / "revisions"
/-- Returns path to the input-to-output mappings of a downloaded package revision. -/ /-- Returns path to the input-to-output mappings of a downloaded package revision. -/
@[inline] public def revisionPath (cache : Cache) (scope : String) (rev : String) : FilePath := @[inline] public def revisionPath (cache : Cache) (scope : String) (rev : GitRev) : FilePath :=
cache.revisionDir / scope / s!"{rev}.jsonl" cache.revisionDir / scope / s!"{rev}.jsonl"
end Cache end Cache
@ -942,7 +943,7 @@ public def uploadArtifacts
public def mapContentType : String := "application/vnd.reservoir.outputs+json-lines" public def mapContentType : String := "application/vnd.reservoir.outputs+json-lines"
def s3RevisionUrl def s3RevisionUrl
(rev : String) (service : CacheService) (scope : CacheServiceScope) (rev : GitRev) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none) (platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: String := : String :=
match scope.impl with match scope.impl with
@ -956,7 +957,7 @@ def s3RevisionUrl
return s!"{url}/{rev}.jsonl" return s!"{url}/{rev}.jsonl"
public def revisionUrl public def revisionUrl
(rev : String) (service : CacheService) (scope : CacheServiceScope) (rev : GitRev) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none) (platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: String := : String :=
if service.isReservoir then Id.run do if service.isReservoir then Id.run do
@ -974,7 +975,7 @@ public def revisionUrl
service.s3RevisionUrl rev scope platform toolchain service.s3RevisionUrl rev scope platform toolchain
public def downloadRevisionOutputs? public def downloadRevisionOutputs?
(rev : String) (cache : Cache) (service : CacheService) (rev : GitRev) (cache : Cache) (service : CacheService)
(localScope : String) (remoteScope : CacheServiceScope) (localScope : String) (remoteScope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none) (force := false) (platform := CachePlatform.none) (toolchain := CacheToolchain.none) (force := false)
: LoggerIO (Option CacheMap) := do : LoggerIO (Option CacheMap) := do
@ -998,7 +999,7 @@ public def downloadRevisionOutputs?
CacheMap.load path platform.isNone CacheMap.load path platform.isNone
public def uploadRevisionOutputs public def uploadRevisionOutputs
(rev : String) (outputs : FilePath) (service : CacheService) (scope : CacheServiceScope) (rev : GitRev) (outputs : FilePath) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none) (platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: LoggerIO Unit := do : LoggerIO Unit := do
let url := service.s3RevisionUrl rev scope platform toolchain let url := service.s3RevisionUrl rev scope platform toolchain

View file

@ -9,6 +9,7 @@ prelude
public import Init.Dynamic public import Init.Dynamic
public import Init.System.FilePath public import Init.System.FilePath
public import Lean.Data.NameMap.Basic public import Lean.Data.NameMap.Basic
public import Lake.Util.Git
import Init.Data.ToString.Name import Init.Data.ToString.Name
import Init.Data.ToString.Macro import Init.Data.ToString.Macro
@ -30,7 +31,7 @@ public inductive DependencySrc where
/- A package located at a fixed path relative to the dependent package's directory. -/ /- A package located at a fixed path relative to the dependent package's directory. -/
| path (dir : FilePath) | path (dir : FilePath)
/- A package cloned from a Git repository available at a fixed Git `url`. -/ /- A package cloned from a Git repository available at a fixed Git `url`. -/
| git (url : String) (rev : Option String) (subDir : Option FilePath) | git (url : String) (rev : Option GitRev) (subDir : Option FilePath)
deriving Inhabited, Repr deriving Inhabited, Repr
/-- /--

View file

@ -8,6 +8,7 @@ module
prelude prelude
public import Lake.Util.Version public import Lake.Util.Version
public import Lake.Config.Defaults public import Lake.Config.Defaults
public import Lake.Util.Git
import Lake.Util.Error import Lake.Util.Error
public import Lake.Util.FilePath public import Lake.Util.FilePath
import Lake.Util.JsonObject import Lake.Util.JsonObject
@ -75,8 +76,8 @@ public inductive PackageEntrySrc
/-- A remote Git package. -/ /-- A remote Git package. -/
| git | git
(url : String) (url : String)
(rev : String) (rev : GitRev)
(inputRev? : Option String) (inputRev? : Option GitRev)
(subDir? : Option FilePath) (subDir? : Option FilePath)
deriving Inhabited deriving Inhabited

View file

@ -13,6 +13,8 @@ import Lake.Util.Git
import Lake.Util.IO import Lake.Util.IO
import Lake.Reservoir import Lake.Reservoir
set_option doc.verso true
open System Lean open System Lean
/-! # Dependency Materialization /-! # Dependency Materialization
@ -23,9 +25,12 @@ or resolve a local path dependency.
namespace Lake namespace Lake
/-- Update the Git package in `repo` to `rev` if not already at it. -/ /--
Update the Git package in {lean}`repo` to the revision {lean}`rev?` if not already at it.
IF no revision is specified (i.e., {lean}`rev? = none`), then uses the latest {lit}`master`.
-/
def updateGitPkg def updateGitPkg
(name : String) (repo : GitRepo) (rev? : Option String) (name : String) (repo : GitRepo) (rev? : Option GitRev)
: LoggerIO PUnit := do : LoggerIO PUnit := do
let rev ← repo.findRemoteRevision rev? let rev ← repo.findRemoteRevision rev?
if (← repo.getHeadRevision) = rev then if (← repo.getHeadRevision) = rev then
@ -40,9 +45,9 @@ def updateGitPkg
-- so stale ones from the previous revision cause incorrect trace computations. -- so stale ones from the previous revision cause incorrect trace computations.
repo.clean repo.clean
/-- Clone the Git package as `repo`. -/ /-- Clone the Git package as {lean}`repo`. -/
def cloneGitPkg def cloneGitPkg
(name : String) (repo : GitRepo) (url : String) (rev? : Option String) (name : String) (repo : GitRepo) (url : String) (rev? : Option GitRev)
: LoggerIO PUnit := do : LoggerIO PUnit := do
logInfo s!"{name}: cloning {url}" logInfo s!"{name}: cloning {url}"
repo.clone url repo.clone url
@ -52,9 +57,9 @@ def cloneGitPkg
repo.checkoutDetach rev repo.checkoutDetach rev
/-- /--
Update the Git repository from `url` in `repo` to `rev?`. Update the Git repository from {lean}`url` in {lean}`repo` to {lean}`rev?`.
If `repo` is already from `url`, just checkout the new revision. If {lean}`repo` is already from {lean}`url`, just checkout the new revision.
Otherwise, delete the local repository and clone a fresh copy from `url`. Otherwise, delete the local repository and clone a fresh copy from {lean}`url`.
-/ -/
def updateGitRepo def updateGitRepo
(name : String) (repo : GitRepo) (url : String) (rev? : Option String) (name : String) (repo : GitRepo) (url : String) (rev? : Option String)
@ -75,8 +80,9 @@ def updateGitRepo
IO.FS.removeDirAll repo.dir IO.FS.removeDirAll repo.dir
cloneGitPkg name repo url rev? cloneGitPkg name repo url rev?
/-- /--
Materialize the Git repository from `url` into `repo` at `rev?`. Materialize the Git repository from {lean}`url` into {lean}`repo` at {lean}`rev?`.
Clone it if no local copy exists, otherwise update it. Clone it if no local copy exists, otherwise update it.
-/ -/
def materializeGitRepo def materializeGitRepo
@ -114,11 +120,11 @@ namespace MaterializedDep
@[inline] public def scope (self : MaterializedDep) : String := @[inline] public def scope (self : MaterializedDep) : String :=
self.manifestEntry.scope self.manifestEntry.scope
/-- Path to the dependency's manfiest file (relative to `relPkgDir`). -/ /-- Path to the dependency's manfiest file (relative to {lean}`relPkgDir`). -/
@[inline] public def relManifestFile? (self : MaterializedDep) : Option FilePath := @[inline] public def relManifestFile? (self : MaterializedDep) : Option FilePath :=
self.manifestEntry.manifestFile? self.manifestEntry.manifestFile?
/-- Path to the dependency's manfiest file (relative to `relPkgDir`). -/ /-- Path to the dependency's manfiest file (relative to {lean}`relPkgDir`). -/
@[inline] public def relManifestFile (self : MaterializedDep) : FilePath := @[inline] public def relManifestFile (self : MaterializedDep) : FilePath :=
self.relManifestFile?.getD defaultManifestFile self.relManifestFile?.getD defaultManifestFile
@ -126,7 +132,7 @@ namespace MaterializedDep
@[inline] public def manifestFile (self : MaterializedDep) : FilePath := @[inline] public def manifestFile (self : MaterializedDep) : FilePath :=
self.pkgDir / self.relManifestFile self.pkgDir / self.relManifestFile
/-- Path to the dependency's configuration file (relative to `relPkgDir`). -/ /-- Path to the dependency's configuration file (relative to {lean}`relPkgDir`). -/
@[inline] public def relConfigFile (self : MaterializedDep) : FilePath := @[inline] public def relConfigFile (self : MaterializedDep) : FilePath :=
self.manifestEntry.configFile self.manifestEntry.configFile
@ -143,7 +149,7 @@ end MaterializedDep
inductive InputVer inductive InputVer
| none | none
| git (rev : String) | git (rev : GitRev)
| ver (ver : VerRange) | ver (ver : VerRange)
def pkgNotIndexed (scope name : String) (ver : InputVer) : String := def pkgNotIndexed (scope name : String) (ver : InputVer) : String :=

View file

@ -10,6 +10,7 @@ public import Init.Data.ToString
public import Lake.Util.Proc public import Lake.Util.Proc
import Init.Data.String.TakeDrop import Init.Data.String.TakeDrop
import Init.Data.String.Search import Init.Data.String.Search
import Lake.Util.String
open System open System
namespace Lake namespace Lake
@ -36,18 +37,48 @@ public def filterUrl? (url : String) : Option String :=
some url some url
public def isFullObjectName (rev : String) : Bool := public def isFullObjectName (rev : String) : Bool :=
rev.length == 40 && rev.all fun c => c.isDigit || ('a' <= c && c <= 'f') rev.utf8ByteSize == 40 && isHex rev
end Git end Git
public structure GitRepo where public structure GitRepo where
dir : FilePath dir : FilePath
public instance : Coe FilePath GitRepo := ⟨(⟨·⟩)⟩
public instance : ToString GitRepo := ⟨(·.dir.toString)⟩ /--
A commit-ish [Git revision][1].
This can be SHA1 commit hash, a branch name, or one of Git's more complex specifiers.
[1]: https://git-scm.com/docs/gitrevisions#_specifying_revisions
-/
public abbrev GitRev := String
namespace GitRev
/-- The head revision (i.e., `HEAD`). -/
@[expose] public def head : GitRev := "HEAD"
/-- The revision fetched during the last `git fetch` (i.e., `FETCH_HEAD`). -/
@[expose] public def fetchHead : GitRev := "FETCH_HEAD"
/-- Returns whether this revision is a 40-digit hexadecimal (SHA1) commit hash. -/
public def isFullSha1 (rev : GitRev) : Bool :=
rev.utf8ByteSize == 40 && isHex rev
attribute [deprecated GitRev.isFullSha1 (since := "2026-04-17")] Git.isFullObjectName
/-- Scopes the revision by the remote. -/
@[inline] public def withRemote (remote : String) (rev : GitRev) : GitRev :=
s!"{remote}/{rev}"
end GitRev
namespace GitRepo namespace GitRepo
public instance : Coe FilePath GitRepo := ⟨(⟨·⟩)⟩
public instance : ToString GitRepo := ⟨(·.dir.toString)⟩
public def cwd : GitRepo := ⟨"."⟩ public def cwd : GitRepo := ⟨"."⟩
@[inline] public def dirExists (repo : GitRepo) : BaseIO Bool := @[inline] public def dirExists (repo : GitRepo) : BaseIO Bool :=
@ -71,12 +102,18 @@ public def clone (url : String) (repo : GitRepo) : LogIO PUnit :=
public def quietInit (repo : GitRepo) : LogIO PUnit := public def quietInit (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["init", "-q"] repo.execGit #["init", "-q"]
public def bareInit (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["init", "--bare", "-q"]
public def insideWorkTree (repo : GitRepo) : BaseIO Bool := do public def insideWorkTree (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["rev-parse", "--is-inside-work-tree"] repo.testGit #["rev-parse", "--is-inside-work-tree"]
public def fetch (repo : GitRepo) (remote := Git.defaultRemote) : LogIO PUnit := public def fetch (repo : GitRepo) (remote := Git.defaultRemote) : LogIO PUnit :=
repo.execGit #["fetch", "--tags", "--force", remote] repo.execGit #["fetch", "--tags", "--force", remote]
public def addWorktreeDetach (path : FilePath) (rev : GitRev) (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["worktree", "add", "--detach", path.toString, rev]
public def checkoutBranch (branch : String) (repo : GitRepo) : LogIO PUnit := public def checkoutBranch (branch : String) (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["checkout", "-B", branch] repo.execGit #["checkout", "-B", branch]
@ -87,60 +124,80 @@ public def checkoutDetach (hash : String) (repo : GitRepo) : LogIO PUnit :=
public def clean (repo : GitRepo) : LogIO PUnit := public def clean (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["clean", "-xf"] repo.execGit #["clean", "-xf"]
public def resolveRevision? (rev : String) (repo : GitRepo) : BaseIO (Option String) := do /-- Resolves the revision to a Git object name (SHA1 hash) which or may not exist in the repository. -/
public def resolveRevision? (rev : GitRev) (repo : GitRepo) : BaseIO (Option GitRev) := do
repo.captureGit? #["rev-parse", "--verify", "--end-of-options", rev] repo.captureGit? #["rev-parse", "--verify", "--end-of-options", rev]
public def resolveRevision (rev : String) (repo : GitRepo) : LogIO String := do /-- Resolves the revision to a valid commit hash within the repository. -/
if Git.isFullObjectName rev then return rev public def findCommit? (rev : GitRev) (repo : GitRepo) : BaseIO (Option GitRev) := do
repo.captureGit? #["rev-parse", "--verify", "--end-of-options", rev ++ "^{commit}"]
public def resolveRevision (rev : GitRev) (repo : GitRepo) : LogIO GitRev := do
if rev.isFullSha1 then return rev
if let some rev ← repo.resolveRevision? rev then return rev if let some rev ← repo.resolveRevision? rev then return rev
error s!"{repo}: revision not found '{rev}'" error s!"{repo}: revision not found '{rev}'"
@[inline] public def getHeadRevision? (repo : GitRepo) : BaseIO (Option String) := @[inline] public def getHeadRevision? (repo : GitRepo) : BaseIO (Option GitRev) :=
repo.resolveRevision? "HEAD" repo.resolveRevision? .head
public def getHeadRevision (repo : GitRepo) : LogIO String := do public def getHeadRevision (repo : GitRepo) : LogIO GitRev := do
if let some rev ← repo.getHeadRevision? then return rev if let some rev ← repo.getHeadRevision? then return rev
error s!"{repo}: could not resolve 'HEAD' to a commit; \ error s!"{repo}: could not resolve 'HEAD' to a commit; \
the repository may be corrupt, so you may need to remove it and try again" the repository may be corrupt, so you may need to remove it and try again"
public def getHeadRevisions (repo : GitRepo) (n : Nat := 0) : LogIO (Array String) := do public def fetchRevision? (repo : GitRepo) (remote : String) (rev : GitRev) : LogIO (Option GitRev) := do
if (← repo.testGit #["fetch", "--tags", "--force", "--refetch", "--filter=tree:0", remote, rev]) then
let some rev ← repo.resolveRevision? .fetchHead
| error s!"{repo}: could not resolve 'FETCH_HEAD' to a commit after fetching; \
this may be an issue with Lake; please report it"
return rev
else
return none
public def getHeadRevisions (repo : GitRepo) (n : Nat := 0) : LogIO (Array GitRev) := do
let args := #["rev-list", "HEAD"] let args := #["rev-list", "HEAD"]
let args := if n != 0 then args ++ #["-n", toString n] else args let args := if n != 0 then args ++ #["-n", toString n] else args
let revs ← repo.captureGit args let revs ← repo.captureGit args
return revs.split '\n' |>.toStringArray return revs.split '\n' |>.toStringArray
public def resolveRemoteRevision (rev : String) (remote := Git.defaultRemote) (repo : GitRepo) : LogIO String := do public def resolveRemoteRevision (rev : GitRev) (remote := Git.defaultRemote) (repo : GitRepo) : LogIO GitRev := do
if Git.isFullObjectName rev then return rev if rev.isFullSha1 then return rev
if let some rev ← repo.resolveRevision? s!"{remote}/{rev}" then return rev if let some rev ← repo.resolveRevision? (rev.withRemote remote) then return rev
if let some rev ← repo.resolveRevision? rev then return rev if let some rev ← repo.resolveRevision? rev then return rev
error s!"{repo}: revision not found '{rev}'" error s!"{repo}: revision not found '{rev}'"
public def findRemoteRevision (repo : GitRepo) (rev? : Option String := none) (remote := Git.defaultRemote) : LogIO String := do public def findRemoteRevision (repo : GitRepo) (rev? : Option GitRev := none) (remote := Git.defaultRemote) : LogIO String := do
repo.fetch remote; repo.resolveRemoteRevision (rev?.getD Git.upstreamBranch) remote repo.fetch remote; repo.resolveRemoteRevision (rev?.getD Git.upstreamBranch) remote
public def branchExists (rev : String) (repo : GitRepo) : BaseIO Bool := do public def branchExists (rev : GitRev) (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["show-ref", "--verify", s!"refs/heads/{rev}"] repo.testGit #["show-ref", "--verify", s!"refs/heads/{rev}"]
public def revisionExists (rev : String) (repo : GitRepo) : BaseIO Bool := do public def revisionExists (rev : GitRev) (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["rev-parse", "--verify", rev ++ "^{commit}"] repo.testGit #["rev-parse", "--verify", rev ++ "^{commit}"]
public def getTags (repo : GitRepo) : BaseIO (List String) := do public def getTags (repo : GitRepo) : BaseIO (List String) := do
let some out ← repo.captureGit? #["tag"] | return [] let some out ← repo.captureGit? #["tag"] | return []
return out.split '\n' |>.toStringList return out.split '\n' |>.toStringList
public def findTag? (rev : String := "HEAD") (repo : GitRepo) : BaseIO (Option String) := do public def findTag? (rev : GitRev := .head) (repo : GitRepo) : BaseIO (Option String) := do
repo.captureGit? #["describe", "--tags", "--exact-match", rev] repo.captureGit? #["describe", "--tags", "--exact-match", rev]
public def getRemoteUrl? public def getRemoteUrl?
(remote := Git.defaultRemote) (repo : GitRepo) (remote := Git.defaultRemote) (repo : GitRepo)
: BaseIO (Option String) := do repo.captureGit? #["remote", "get-url", remote] : BaseIO (Option String) := repo.captureGit? #["remote", "get-url", remote]
public def addRemote (remote : String) (url : String) (repo : GitRepo) : LogIO Unit :=
repo.execGit #["remote", "add", remote, url]
public def setRemoteUrl (remote : String) (url : String) (repo : GitRepo) : LogIO Unit :=
repo.execGit #["remote", "set-url", remote, url]
public def getFilteredRemoteUrl? public def getFilteredRemoteUrl?
(remote := Git.defaultRemote) (repo : GitRepo) (remote := Git.defaultRemote) (repo : GitRepo)
: BaseIO (Option String) := OptionT.run do Git.filterUrl? (← repo.getRemoteUrl? remote) : BaseIO (Option String) := OptionT.run do Git.filterUrl? (← repo.getRemoteUrl? remote)
public def hasNoDiff (repo : GitRepo) : BaseIO Bool := do public def hasNoDiff (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["diff", "HEAD", "--exit-code"] repo.testGit #["diff", "--exit-code", "HEAD"]
@[inline] public def hasDiff (repo : GitRepo) : BaseIO Bool := do @[inline] public def hasDiff (repo : GitRepo) : BaseIO Bool := do
not <$> repo.hasNoDiff not <$> repo.hasNoDiff