Initial git-log-based gate ran into a permission wall: the cis490
service user can't read /home/max/cis490/.git (ProtectHome=true +
home-dir mode). Switching the production source to the local Forgejo
HTTP API (already accessible to all WG peers, single source of truth
both lab hosts and the receiver pull from). When the maintainer
pushes new code to spectral/CIS490, the next 5-second cache refresh
sees the new commit and lab hosts can immediately ship under it.
VersionGate now takes either:
- forgejo_url + repo_owner + repo_name + branch (+ optional
auth_token for private repos): hits
/api/v1/repos/<owner>/<name>/commits?sha=<branch>&limit=<n>
- repo_path: dev-only fallback, runs `git log` locally
Local-git path retained for tests + the dev-only case.
receiver.toml.example gains forgejo_url/repo_owner/repo_name/branch
with auth_token commented; live-deployed receiver.toml on the Pi has
the spectral org + token.
Live state on the Pi: 41 valid hashes loaded, head=f8ad02b. Verified
end-to-end:
bogus commit → 412 + remediation
HEAD commit → clears gate (fails downstream at sha-mismatch as
expected for the empty-body verify probe)
Test added: test_forgejo_backend_accepts_returned_commits stands up
a tiny canned-response HTTPServer in-process, exercises the parser
without depending on a live Forgejo instance. Brings test_version_gate
to 10 cases; total 158/158.
175 lines
6.6 KiB
Python
175 lines
6.6 KiB
Python
"""Live commit allow-list for the receiver.
|
|
|
|
The receiver only stores episodes whose `meta.json::code_version.commit`
|
|
matches a commit in the canonical repository's recent history. Two
|
|
backends are supported:
|
|
|
|
forgejo: queries
|
|
GET /api/v1/repos/<owner>/<name>/commits?sha=<branch>&limit=<n>
|
|
on a Forgejo instance the maintainer pushes to. PRODUCTION
|
|
DEFAULT — Forgejo is the authoritative source of truth that
|
|
both lab hosts and the receiver pull from, so when the
|
|
maintainer pushes new code the new commit becomes acceptable
|
|
automatically.
|
|
|
|
git: runs `git log -n <window> --format=%H` against a local
|
|
checkout. Used by tests + dev-only setups where a Forgejo
|
|
instance isn't available.
|
|
|
|
Cache TTL: 5s by default — push a commit, wait 5s, the new hash is
|
|
in the allow-list. No service restart.
|
|
|
|
Episodes from older code (before a known bug fix) get rejected with
|
|
HTTP 412 + a remediation block telling the lab-host operator to pull
|
|
main and re-run the install. That keeps bad data out of the index.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
import threading
|
|
import time
|
|
import urllib.parse
|
|
import urllib.request
|
|
from pathlib import Path
|
|
|
|
|
|
log = logging.getLogger("cis490.receiver.version_gate")
|
|
|
|
|
|
class VersionGate:
|
|
"""Maintains the set of acceptable commit hashes via either a
|
|
Forgejo HTTP API call or a local `git log`.
|
|
|
|
Args:
|
|
forgejo_url: e.g. "http://10.100.0.1:3000". Setting this enables
|
|
the Forgejo backend; ``repo_owner``/``repo_name``/``branch``
|
|
must also be set. ``auth_token`` is optional but recommended
|
|
(so a private Forgejo doesn't need to be world-readable).
|
|
repo_path: local checkout (fallback / test backend). Used iff
|
|
``forgejo_url`` is None.
|
|
window: how many recent commits count as valid.
|
|
cache_ttl_s: how long to trust the cache before refreshing.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
repo_path: Path | None = None,
|
|
*,
|
|
window: int = 100,
|
|
cache_ttl_s: float = 5.0,
|
|
forgejo_url: str | None = None,
|
|
repo_owner: str | None = None,
|
|
repo_name: str | None = None,
|
|
branch: str = "main",
|
|
auth_token: str | None = None,
|
|
) -> None:
|
|
self.repo_path = Path(repo_path) if repo_path else None
|
|
self.window = int(window)
|
|
self.cache_ttl_s = float(cache_ttl_s)
|
|
self.forgejo_url = forgejo_url.rstrip("/") if forgejo_url else None
|
|
self.repo_owner = repo_owner
|
|
self.repo_name = repo_name
|
|
self.branch = branch
|
|
self.auth_token = auth_token
|
|
if not self.forgejo_url and not self.repo_path:
|
|
raise ValueError("VersionGate needs forgejo_url or repo_path")
|
|
self._lock = threading.Lock()
|
|
self._cached_hashes: frozenset[str] = frozenset()
|
|
self._cached_at: float = 0.0
|
|
self._head: str | None = None
|
|
|
|
# ---- backend dispatch -----------------------------------------------
|
|
|
|
def _refresh(self) -> None:
|
|
if self.forgejo_url:
|
|
hashes, head = self._refresh_forgejo()
|
|
else:
|
|
hashes, head = self._refresh_git()
|
|
if not hashes:
|
|
log.warning("version-gate refresh empty; keeping prior cache "
|
|
"of %d hashes", len(self._cached_hashes))
|
|
self._cached_at = time.monotonic()
|
|
return
|
|
with self._lock:
|
|
self._cached_hashes = frozenset(hashes)
|
|
self._cached_at = time.monotonic()
|
|
self._head = head
|
|
log.info("version-gate refreshed: %d valid hashes, head=%s, source=%s",
|
|
len(hashes), head[:12] if head else "?",
|
|
"forgejo" if self.forgejo_url else "git")
|
|
|
|
def _refresh_forgejo(self) -> tuple[set[str], str | None]:
|
|
"""GET /api/v1/repos/<owner>/<name>/commits?sha=<branch>&limit=<n>."""
|
|
url = (
|
|
f"{self.forgejo_url}/api/v1/repos/{self.repo_owner}/"
|
|
f"{self.repo_name}/commits"
|
|
f"?sha={urllib.parse.quote(self.branch)}&limit={self.window}"
|
|
)
|
|
req = urllib.request.Request(url)
|
|
if self.auth_token:
|
|
req.add_header("Authorization", f"token {self.auth_token}")
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=3) as r:
|
|
rows = json.loads(r.read().decode("utf-8"))
|
|
except (urllib.request.HTTPError, urllib.request.URLError,
|
|
json.JSONDecodeError, OSError) as e:
|
|
log.warning("forgejo refresh failed (%s); keeping prior cache", e)
|
|
return set(), self._head
|
|
hashes: set[str] = set()
|
|
head: str | None = None
|
|
for i, row in enumerate(rows or []):
|
|
sha = row.get("sha")
|
|
if isinstance(sha, str) and len(sha) == 40:
|
|
sha = sha.lower()
|
|
hashes.add(sha)
|
|
if i == 0:
|
|
head = sha
|
|
return hashes, head
|
|
|
|
def _refresh_git(self) -> tuple[set[str], str | None]:
|
|
"""`git log -n <window> --format=%H` from `repo_path`."""
|
|
try:
|
|
out = subprocess.run(
|
|
["git", "-C", str(self.repo_path),
|
|
"log", f"-n{self.window}", "--format=%H"],
|
|
check=True, capture_output=True, text=True, timeout=3,
|
|
).stdout
|
|
except (subprocess.SubprocessError, FileNotFoundError, OSError) as e:
|
|
log.warning("git refresh failed (%s); keeping prior cache", e)
|
|
return set(), self._head
|
|
lines = [h.strip().lower() for h in out.splitlines() if h.strip()]
|
|
head = lines[0] if lines else None
|
|
return set(lines), head
|
|
|
|
def _maybe_refresh(self) -> None:
|
|
if (time.monotonic() - self._cached_at) > self.cache_ttl_s:
|
|
self._refresh()
|
|
|
|
def head(self) -> str | None:
|
|
"""Return the most recent valid commit (HEAD of the branch
|
|
the receiver is mirroring). Used by the 412 response so the
|
|
client knows what to pull to."""
|
|
self._maybe_refresh()
|
|
return self._head
|
|
|
|
def valid_count(self) -> int:
|
|
self._maybe_refresh()
|
|
return len(self._cached_hashes)
|
|
|
|
def check(self, commit: str | None) -> tuple[bool, str | None]:
|
|
"""Return (ok, reason). ``reason`` is None on success, a
|
|
short string identifying the failure mode otherwise."""
|
|
if not commit:
|
|
return False, "missing"
|
|
c = commit.strip().lower()
|
|
if len(c) != 40 or not all(ch in "0123456789abcdef" for ch in c):
|
|
return False, "bad-format"
|
|
self._maybe_refresh()
|
|
with self._lock:
|
|
allowed = self._cached_hashes
|
|
if c in allowed:
|
|
return True, None
|
|
return False, "not-in-window"
|