refactor: lake: introduce GitRev (#13456)

This PR adds a type abbreviation `GitRev` to Lake, which is used for
`String` values that signify Git revisions. Such revisions may be a SHA1
commit hash, a branch name, or one of Git's more complex specifiers.

The PR also adds a number of additional Git primitives which are useful
for #11662.
This commit is contained in:
Mac Malone 2026-04-17 23:44:26 -04:00 committed by GitHub
parent d3c069593b
commit 43e1e8285b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 107 additions and 41 deletions

View file

@ -69,7 +69,7 @@ public structure LakeOptions where
scope? : Option CacheServiceScope := none
platform? : Option CachePlatform := none
toolchain? : Option CacheToolchain := none
rev? : Option String := none
rev? : Option GitRev := none
maxRevs : Nat := 100
shake : Shake.Args := {}
@ -563,7 +563,7 @@ private def computePackageRev (pkgDir : FilePath) : CliStateM String := do
repo.getHeadRevision
private def putCore
(rev : String) (outputs : FilePath) (artDir : FilePath)
(rev : GitRev) (outputs : FilePath) (artDir : FilePath)
(service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: LoggerIO Unit := do

View file

@ -7,6 +7,7 @@ module
prelude
import Init.Control.Do
public import Lake.Util.Git
public import Lake.Util.Log
public import Lake.Util.Version
public import Lake.Config.Artifact
@ -469,7 +470,7 @@ public def readOutputs? (cache : Cache) (scope : String) (inputHash : Hash) : Lo
cache.dir / "revisions"
/-- Returns path to the input-to-output mappings of a downloaded package revision. -/
@[inline] public def revisionPath (cache : Cache) (scope : String) (rev : String) : FilePath :=
@[inline] public def revisionPath (cache : Cache) (scope : String) (rev : GitRev) : FilePath :=
cache.revisionDir / scope / s!"{rev}.jsonl"
end Cache
@ -942,7 +943,7 @@ public def uploadArtifacts
public def mapContentType : String := "application/vnd.reservoir.outputs+json-lines"
def s3RevisionUrl
(rev : String) (service : CacheService) (scope : CacheServiceScope)
(rev : GitRev) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: String :=
match scope.impl with
@ -956,7 +957,7 @@ def s3RevisionUrl
return s!"{url}/{rev}.jsonl"
public def revisionUrl
(rev : String) (service : CacheService) (scope : CacheServiceScope)
(rev : GitRev) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: String :=
if service.isReservoir then Id.run do
@ -974,7 +975,7 @@ public def revisionUrl
service.s3RevisionUrl rev scope platform toolchain
public def downloadRevisionOutputs?
(rev : String) (cache : Cache) (service : CacheService)
(rev : GitRev) (cache : Cache) (service : CacheService)
(localScope : String) (remoteScope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none) (force := false)
: LoggerIO (Option CacheMap) := do
@ -998,7 +999,7 @@ public def downloadRevisionOutputs?
CacheMap.load path platform.isNone
public def uploadRevisionOutputs
(rev : String) (outputs : FilePath) (service : CacheService) (scope : CacheServiceScope)
(rev : GitRev) (outputs : FilePath) (service : CacheService) (scope : CacheServiceScope)
(platform := CachePlatform.none) (toolchain := CacheToolchain.none)
: LoggerIO Unit := do
let url := service.s3RevisionUrl rev scope platform toolchain

View file

@ -9,6 +9,7 @@ prelude
public import Init.Dynamic
public import Init.System.FilePath
public import Lean.Data.NameMap.Basic
public import Lake.Util.Git
import Init.Data.ToString.Name
import Init.Data.ToString.Macro
@ -30,7 +31,7 @@ public inductive DependencySrc where
/- A package located at a fixed path relative to the dependent package's directory. -/
| path (dir : FilePath)
/- A package cloned from a Git repository available at a fixed Git `url`. -/
| git (url : String) (rev : Option String) (subDir : Option FilePath)
| git (url : String) (rev : Option GitRev) (subDir : Option FilePath)
deriving Inhabited, Repr
/--

View file

@ -8,6 +8,7 @@ module
prelude
public import Lake.Util.Version
public import Lake.Config.Defaults
public import Lake.Util.Git
import Lake.Util.Error
public import Lake.Util.FilePath
import Lake.Util.JsonObject
@ -75,8 +76,8 @@ public inductive PackageEntrySrc
/-- A remote Git package. -/
| git
(url : String)
(rev : String)
(inputRev? : Option String)
(rev : GitRev)
(inputRev? : Option GitRev)
(subDir? : Option FilePath)
deriving Inhabited

View file

@ -13,6 +13,8 @@ import Lake.Util.Git
import Lake.Util.IO
import Lake.Reservoir
set_option doc.verso true
open System Lean
/-! # Dependency Materialization
@ -23,9 +25,12 @@ or resolve a local path dependency.
namespace Lake
/-- Update the Git package in `repo` to `rev` if not already at it. -/
/--
Update the Git package in {lean}`repo` to the revision {lean}`rev?` if not already at it.
IF no revision is specified (i.e., {lean}`rev? = none`), then uses the latest {lit}`master`.
-/
def updateGitPkg
(name : String) (repo : GitRepo) (rev? : Option String)
(name : String) (repo : GitRepo) (rev? : Option GitRev)
: LoggerIO PUnit := do
let rev ← repo.findRemoteRevision rev?
if (← repo.getHeadRevision) = rev then
@ -40,9 +45,9 @@ def updateGitPkg
-- so stale ones from the previous revision cause incorrect trace computations.
repo.clean
/-- Clone the Git package as `repo`. -/
/-- Clone the Git package as {lean}`repo`. -/
def cloneGitPkg
(name : String) (repo : GitRepo) (url : String) (rev? : Option String)
(name : String) (repo : GitRepo) (url : String) (rev? : Option GitRev)
: LoggerIO PUnit := do
logInfo s!"{name}: cloning {url}"
repo.clone url
@ -52,9 +57,9 @@ def cloneGitPkg
repo.checkoutDetach rev
/--
Update the Git repository from `url` in `repo` to `rev?`.
If `repo` is already from `url`, just checkout the new revision.
Otherwise, delete the local repository and clone a fresh copy from `url`.
Update the Git repository from {lean}`url` in {lean}`repo` to {lean}`rev?`.
If {lean}`repo` is already from {lean}`url`, just checkout the new revision.
Otherwise, delete the local repository and clone a fresh copy from {lean}`url`.
-/
def updateGitRepo
(name : String) (repo : GitRepo) (url : String) (rev? : Option String)
@ -75,8 +80,9 @@ def updateGitRepo
IO.FS.removeDirAll repo.dir
cloneGitPkg name repo url rev?
/--
Materialize the Git repository from `url` into `repo` at `rev?`.
Materialize the Git repository from {lean}`url` into {lean}`repo` at {lean}`rev?`.
Clone it if no local copy exists, otherwise update it.
-/
def materializeGitRepo
@ -114,11 +120,11 @@ namespace MaterializedDep
@[inline] public def scope (self : MaterializedDep) : String :=
self.manifestEntry.scope
/-- Path to the dependency's manfiest file (relative to `relPkgDir`). -/
/-- Path to the dependency's manfiest file (relative to {lean}`relPkgDir`). -/
@[inline] public def relManifestFile? (self : MaterializedDep) : Option FilePath :=
self.manifestEntry.manifestFile?
/-- Path to the dependency's manfiest file (relative to `relPkgDir`). -/
/-- Path to the dependency's manfiest file (relative to {lean}`relPkgDir`). -/
@[inline] public def relManifestFile (self : MaterializedDep) : FilePath :=
self.relManifestFile?.getD defaultManifestFile
@ -126,7 +132,7 @@ namespace MaterializedDep
@[inline] public def manifestFile (self : MaterializedDep) : FilePath :=
self.pkgDir / self.relManifestFile
/-- Path to the dependency's configuration file (relative to `relPkgDir`). -/
/-- Path to the dependency's configuration file (relative to {lean}`relPkgDir`). -/
@[inline] public def relConfigFile (self : MaterializedDep) : FilePath :=
self.manifestEntry.configFile
@ -143,7 +149,7 @@ end MaterializedDep
inductive InputVer
| none
| git (rev : String)
| git (rev : GitRev)
| ver (ver : VerRange)
def pkgNotIndexed (scope name : String) (ver : InputVer) : String :=

View file

@ -10,6 +10,7 @@ public import Init.Data.ToString
public import Lake.Util.Proc
import Init.Data.String.TakeDrop
import Init.Data.String.Search
import Lake.Util.String
open System
namespace Lake
@ -36,18 +37,48 @@ public def filterUrl? (url : String) : Option String :=
some url
public def isFullObjectName (rev : String) : Bool :=
rev.length == 40 && rev.all fun c => c.isDigit || ('a' <= c && c <= 'f')
rev.utf8ByteSize == 40 && isHex rev
end Git
public structure GitRepo where
dir : FilePath
public instance : Coe FilePath GitRepo := ⟨(⟨·⟩)⟩
public instance : ToString GitRepo := ⟨(·.dir.toString)⟩
/--
A commit-ish [Git revision][1].
This can be SHA1 commit hash, a branch name, or one of Git's more complex specifiers.
[1]: https://git-scm.com/docs/gitrevisions#_specifying_revisions
-/
public abbrev GitRev := String
namespace GitRev
/-- The head revision (i.e., `HEAD`). -/
@[expose] public def head : GitRev := "HEAD"
/-- The revision fetched during the last `git fetch` (i.e., `FETCH_HEAD`). -/
@[expose] public def fetchHead : GitRev := "FETCH_HEAD"
/-- Returns whether this revision is a 40-digit hexadecimal (SHA1) commit hash. -/
public def isFullSha1 (rev : GitRev) : Bool :=
rev.utf8ByteSize == 40 && isHex rev
attribute [deprecated GitRev.isFullSha1 (since := "2026-04-17")] Git.isFullObjectName
/-- Scopes the revision by the remote. -/
@[inline] public def withRemote (remote : String) (rev : GitRev) : GitRev :=
s!"{remote}/{rev}"
end GitRev
namespace GitRepo
public instance : Coe FilePath GitRepo := ⟨(⟨·⟩)⟩
public instance : ToString GitRepo := ⟨(·.dir.toString)⟩
public def cwd : GitRepo := ⟨"."⟩
@[inline] public def dirExists (repo : GitRepo) : BaseIO Bool :=
@ -71,12 +102,18 @@ public def clone (url : String) (repo : GitRepo) : LogIO PUnit :=
public def quietInit (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["init", "-q"]
public def bareInit (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["init", "--bare", "-q"]
public def insideWorkTree (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["rev-parse", "--is-inside-work-tree"]
public def fetch (repo : GitRepo) (remote := Git.defaultRemote) : LogIO PUnit :=
repo.execGit #["fetch", "--tags", "--force", remote]
public def addWorktreeDetach (path : FilePath) (rev : GitRev) (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["worktree", "add", "--detach", path.toString, rev]
public def checkoutBranch (branch : String) (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["checkout", "-B", branch]
@ -87,60 +124,80 @@ public def checkoutDetach (hash : String) (repo : GitRepo) : LogIO PUnit :=
public def clean (repo : GitRepo) : LogIO PUnit :=
repo.execGit #["clean", "-xf"]
public def resolveRevision? (rev : String) (repo : GitRepo) : BaseIO (Option String) := do
/-- Resolves the revision to a Git object name (SHA1 hash) which or may not exist in the repository. -/
public def resolveRevision? (rev : GitRev) (repo : GitRepo) : BaseIO (Option GitRev) := do
repo.captureGit? #["rev-parse", "--verify", "--end-of-options", rev]
public def resolveRevision (rev : String) (repo : GitRepo) : LogIO String := do
if Git.isFullObjectName rev then return rev
/-- Resolves the revision to a valid commit hash within the repository. -/
public def findCommit? (rev : GitRev) (repo : GitRepo) : BaseIO (Option GitRev) := do
repo.captureGit? #["rev-parse", "--verify", "--end-of-options", rev ++ "^{commit}"]
public def resolveRevision (rev : GitRev) (repo : GitRepo) : LogIO GitRev := do
if rev.isFullSha1 then return rev
if let some rev ← repo.resolveRevision? rev then return rev
error s!"{repo}: revision not found '{rev}'"
@[inline] public def getHeadRevision? (repo : GitRepo) : BaseIO (Option String) :=
repo.resolveRevision? "HEAD"
@[inline] public def getHeadRevision? (repo : GitRepo) : BaseIO (Option GitRev) :=
repo.resolveRevision? .head
public def getHeadRevision (repo : GitRepo) : LogIO String := do
public def getHeadRevision (repo : GitRepo) : LogIO GitRev := do
if let some rev ← repo.getHeadRevision? then return rev
error s!"{repo}: could not resolve 'HEAD' to a commit; \
the repository may be corrupt, so you may need to remove it and try again"
public def getHeadRevisions (repo : GitRepo) (n : Nat := 0) : LogIO (Array String) := do
public def fetchRevision? (repo : GitRepo) (remote : String) (rev : GitRev) : LogIO (Option GitRev) := do
if (← repo.testGit #["fetch", "--tags", "--force", "--refetch", "--filter=tree:0", remote, rev]) then
let some rev ← repo.resolveRevision? .fetchHead
| error s!"{repo}: could not resolve 'FETCH_HEAD' to a commit after fetching; \
this may be an issue with Lake; please report it"
return rev
else
return none
public def getHeadRevisions (repo : GitRepo) (n : Nat := 0) : LogIO (Array GitRev) := do
let args := #["rev-list", "HEAD"]
let args := if n != 0 then args ++ #["-n", toString n] else args
let revs ← repo.captureGit args
return revs.split '\n' |>.toStringArray
public def resolveRemoteRevision (rev : String) (remote := Git.defaultRemote) (repo : GitRepo) : LogIO String := do
if Git.isFullObjectName rev then return rev
if let some rev ← repo.resolveRevision? s!"{remote}/{rev}" then return rev
public def resolveRemoteRevision (rev : GitRev) (remote := Git.defaultRemote) (repo : GitRepo) : LogIO GitRev := do
if rev.isFullSha1 then return rev
if let some rev ← repo.resolveRevision? (rev.withRemote remote) then return rev
if let some rev ← repo.resolveRevision? rev then return rev
error s!"{repo}: revision not found '{rev}'"
public def findRemoteRevision (repo : GitRepo) (rev? : Option String := none) (remote := Git.defaultRemote) : LogIO String := do
public def findRemoteRevision (repo : GitRepo) (rev? : Option GitRev := none) (remote := Git.defaultRemote) : LogIO String := do
repo.fetch remote; repo.resolveRemoteRevision (rev?.getD Git.upstreamBranch) remote
public def branchExists (rev : String) (repo : GitRepo) : BaseIO Bool := do
public def branchExists (rev : GitRev) (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["show-ref", "--verify", s!"refs/heads/{rev}"]
public def revisionExists (rev : String) (repo : GitRepo) : BaseIO Bool := do
public def revisionExists (rev : GitRev) (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["rev-parse", "--verify", rev ++ "^{commit}"]
public def getTags (repo : GitRepo) : BaseIO (List String) := do
let some out ← repo.captureGit? #["tag"] | return []
return out.split '\n' |>.toStringList
public def findTag? (rev : String := "HEAD") (repo : GitRepo) : BaseIO (Option String) := do
public def findTag? (rev : GitRev := .head) (repo : GitRepo) : BaseIO (Option String) := do
repo.captureGit? #["describe", "--tags", "--exact-match", rev]
public def getRemoteUrl?
(remote := Git.defaultRemote) (repo : GitRepo)
: BaseIO (Option String) := do repo.captureGit? #["remote", "get-url", remote]
: BaseIO (Option String) := repo.captureGit? #["remote", "get-url", remote]
public def addRemote (remote : String) (url : String) (repo : GitRepo) : LogIO Unit :=
repo.execGit #["remote", "add", remote, url]
public def setRemoteUrl (remote : String) (url : String) (repo : GitRepo) : LogIO Unit :=
repo.execGit #["remote", "set-url", remote, url]
public def getFilteredRemoteUrl?
(remote := Git.defaultRemote) (repo : GitRepo)
: BaseIO (Option String) := OptionT.run do Git.filterUrl? (← repo.getRemoteUrl? remote)
public def hasNoDiff (repo : GitRepo) : BaseIO Bool := do
repo.testGit #["diff", "HEAD", "--exit-code"]
repo.testGit #["diff", "--exit-code", "HEAD"]
@[inline] public def hasDiff (repo : GitRepo) : BaseIO Bool := do
not <$> repo.hasNoDiff