Stops out-of-date lab hosts from polluting the dataset with episodes
generated by buggy code. The valid-commits set mirrors the maintainer's
working clone on the Pi automatically — when the maintainer pulls or
pushes a new commit, the receiver picks it up within the 5-second
cache TTL with no service restart.
Receiver changes:
- receiver/version_gate.py (new): VersionGate(repo_path, window).
Each check() consults a frozenset of the last `window` commit
hashes from `git -C <repo> log --format=%H -n <window>`, refreshed
every 5s under a lock. Resilient to transient git failure (keeps
prior cache so a flaky `git` doesn't lock out every shipper).
- receiver/app.py: PUT extracts X-Cis490-Code-Commit; gate.check()
before ingest. Rejects with:
400 + remediation if header missing or malformed
412 + remediation + your_commit + head_commit if not in window
Remediation block is verbatim copy-pasteable into the lab-host
shell:
cd /opt/cis490 && sudo -u cis490 git pull origin main
sudo /opt/cis490/scripts/install-lab-host.sh
sudo systemctl restart cis490-orchestrator
- receiver/store.py: ingest_stream takes commit kwarg, stamps it on
the index.jsonl row (new optional field). Backfilled rows from
index_backfill.py also pull commit out of meta.json.
- receiver/config.py + etc/receiver.toml.example: new [version_gate]
section. enabled=true, repo_path=/home/max/cis490, window=100 by
default. Enabled toggle exists for emergency disable-and-collect.
Shipper changes:
- shipper/transport.py: ship_tarball() takes commit kwarg, sends
X-Cis490-Code-Commit header. 412 maps to status='fatal' so the
queue doesn't infinite-retry — operator must pull and reinstall
before the next ship will succeed.
- shipper/queue.py: reads meta.json::code_version.commit per
episode, passes through. On 412, logs the receiver's full
remediation block at ERROR level so journalctl on the lab host
shows exactly what to run.
Tests: 9 in test_version_gate (including 2 end-to-end via
starlette.testclient), 2 cover the boundary where new commits land
mid-cache and where missing-repo gracefully keeps prior cache.
157/157 total.
Index schema: existing rows stay valid (commit field is optional
on read). New rows from receiver-direct AND from index_backfill.py
include commit.
222 lines
7.3 KiB
Python
222 lines
7.3 KiB
Python
"""Tests for the receiver's commit-allow-list gate.
|
|
|
|
The gate refreshes the allow-list from `git log` of a configured
|
|
repo path. Tests use real git operations on a temp repo so we
|
|
exercise the same subprocess code paths the receiver does in
|
|
production.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from receiver.version_gate import VersionGate
|
|
|
|
|
|
def _git(cwd: Path, *args: str) -> str:
|
|
return subprocess.check_output(
|
|
["git", "-c", "user.email=t@t", "-c", "user.name=t",
|
|
"-C", str(cwd), *args],
|
|
text=True,
|
|
).strip()
|
|
|
|
|
|
@pytest.fixture
|
|
def repo(tmp_path: Path) -> Path:
|
|
r = tmp_path / "repo"
|
|
r.mkdir()
|
|
_git(r, "init", "--initial-branch=main")
|
|
(r / "f").write_text("v1")
|
|
_git(r, "add", "f")
|
|
_git(r, "commit", "-m", "v1")
|
|
return r
|
|
|
|
|
|
def _commits(repo: Path) -> list[str]:
|
|
return _git(repo, "log", "--format=%H").splitlines()
|
|
|
|
|
|
def test_check_accepts_head_commit(repo: Path) -> None:
|
|
g = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
head = _commits(repo)[0]
|
|
ok, reason = g.check(head)
|
|
assert ok and reason is None
|
|
assert g.head() == head
|
|
|
|
|
|
def test_check_rejects_unknown_commit(repo: Path) -> None:
|
|
g = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
ok, reason = g.check("0" * 40)
|
|
assert not ok and reason == "not-in-window"
|
|
|
|
|
|
def test_check_rejects_missing_commit(repo: Path) -> None:
|
|
g = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
ok, reason = g.check(None)
|
|
assert not ok and reason == "missing"
|
|
ok, reason = g.check("")
|
|
assert not ok and reason == "missing"
|
|
|
|
|
|
def test_check_rejects_bad_format(repo: Path) -> None:
|
|
g = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
ok, reason = g.check("not-a-hash")
|
|
assert not ok and reason == "bad-format"
|
|
ok, reason = g.check("ABCDEF") # too short, but valid hex
|
|
assert not ok and reason == "bad-format"
|
|
|
|
|
|
def test_new_commit_after_pull_is_accepted_within_ttl(repo: Path) -> None:
|
|
"""The whole point: when the maintainer commits new code on the
|
|
Pi, the receiver picks it up automatically without restart."""
|
|
g = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
# Add a new commit AFTER gate is constructed.
|
|
(repo / "f").write_text("v2")
|
|
_git(repo, "commit", "-am", "v2")
|
|
new_head = _commits(repo)[0]
|
|
# cache_ttl_s=0 forces refresh on next check.
|
|
ok, _ = g.check(new_head)
|
|
assert ok
|
|
assert g.head() == new_head
|
|
|
|
|
|
def test_window_limits_history(repo: Path) -> None:
|
|
"""Old commits past the window should drop out of the allow-list."""
|
|
# Add 5 more commits.
|
|
for i in range(2, 7):
|
|
(repo / "f").write_text(f"v{i}")
|
|
_git(repo, "commit", "-am", f"v{i}")
|
|
all_commits = _commits(repo)
|
|
assert len(all_commits) == 6
|
|
g = VersionGate(repo, window=3, cache_ttl_s=0)
|
|
# Top 3 are valid.
|
|
for c in all_commits[:3]:
|
|
ok, _ = g.check(c)
|
|
assert ok, f"{c[:8]} should be in window"
|
|
# Older 3 are not.
|
|
for c in all_commits[3:]:
|
|
ok, reason = g.check(c)
|
|
assert not ok and reason == "not-in-window"
|
|
|
|
|
|
def test_e2e_receiver_returns_412_for_unknown_commit(repo: Path, tmp_path: Path) -> None:
|
|
"""End-to-end: PUT with an out-of-window commit returns 412 with
|
|
the remediation block, and the tarball does NOT land on disk."""
|
|
import io as _io, json as _json, tarfile as _tar, hashlib as _h
|
|
from starlette.testclient import TestClient
|
|
from receiver.app import make_app
|
|
from receiver.store import EpisodeStore
|
|
|
|
head = _commits(repo)[0]
|
|
rcv_root = tmp_path / "rcv"
|
|
store = EpisodeStore(
|
|
store_root=rcv_root / "ep",
|
|
incoming_root=rcv_root / "in",
|
|
index_path=rcv_root / "index.jsonl",
|
|
)
|
|
gate = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
app = make_app(store=store, max_episode_bytes=10_000_000,
|
|
bearer_token=None, version_gate=gate)
|
|
|
|
# Build a tiny valid tarball.
|
|
raw = _io.BytesIO()
|
|
with _tar.open(fileobj=raw, mode="w") as t:
|
|
info = _tar.TarInfo("01TEST/meta.json")
|
|
body = b"{}"
|
|
info.size = len(body)
|
|
t.addfile(info, _io.BytesIO(body))
|
|
payload = raw.getvalue()
|
|
sha = _h.sha256(payload).hexdigest()
|
|
|
|
with TestClient(app) as client:
|
|
# Wrong commit: rejected with 412 + remediation in body.
|
|
bad = "0" * 40
|
|
r = client.put(
|
|
f"/v1/episodes/lab1/01TEST.tar.zst",
|
|
content=payload,
|
|
headers={
|
|
"X-Content-SHA256": sha,
|
|
"X-Lab-Host": "lab1",
|
|
"X-Cis490-Code-Commit": bad,
|
|
},
|
|
)
|
|
assert r.status_code == 412
|
|
body = r.json()
|
|
assert "remediation" in body
|
|
assert body["your_commit"] == bad
|
|
assert body["head_commit"] == head
|
|
# Index must NOT have grown.
|
|
assert store.index_path.read_text() == ""
|
|
|
|
# Right commit: accepted (201).
|
|
r = client.put(
|
|
f"/v1/episodes/lab1/01TEST.tar.zst",
|
|
content=payload,
|
|
headers={
|
|
"X-Content-SHA256": sha,
|
|
"X-Lab-Host": "lab1",
|
|
"X-Cis490-Code-Commit": head,
|
|
},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
# Index gained one row stamped with the commit.
|
|
rows = [_json.loads(l) for l in store.index_path.read_text().splitlines() if l.strip()]
|
|
assert len(rows) == 1
|
|
assert rows[0]["commit"] == head
|
|
|
|
|
|
def test_e2e_receiver_returns_400_when_commit_header_missing(repo: Path, tmp_path: Path) -> None:
|
|
"""Missing header is a client bug (lab host pre-stamp-update);
|
|
receiver returns 400 with remediation."""
|
|
import io as _io, tarfile as _tar, hashlib as _h
|
|
from starlette.testclient import TestClient
|
|
from receiver.app import make_app
|
|
from receiver.store import EpisodeStore
|
|
|
|
rcv_root = tmp_path / "rcv"
|
|
store = EpisodeStore(
|
|
store_root=rcv_root / "ep",
|
|
incoming_root=rcv_root / "in",
|
|
index_path=rcv_root / "index.jsonl",
|
|
)
|
|
gate = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
app = make_app(store=store, max_episode_bytes=10_000_000,
|
|
bearer_token=None, version_gate=gate)
|
|
|
|
raw = _io.BytesIO()
|
|
with _tar.open(fileobj=raw, mode="w") as t:
|
|
info = _tar.TarInfo("01TEST/meta.json")
|
|
info.size = 2
|
|
t.addfile(info, _io.BytesIO(b"{}"))
|
|
payload = raw.getvalue()
|
|
sha = _h.sha256(payload).hexdigest()
|
|
with TestClient(app) as client:
|
|
r = client.put(
|
|
f"/v1/episodes/lab1/01TEST.tar.zst",
|
|
content=payload,
|
|
headers={
|
|
"X-Content-SHA256": sha,
|
|
"X-Lab-Host": "lab1",
|
|
# no X-Cis490-Code-Commit
|
|
},
|
|
)
|
|
assert r.status_code == 400
|
|
assert "missing" in r.json()["error"].lower()
|
|
|
|
|
|
def test_missing_repo_keeps_prior_cache(repo: Path) -> None:
|
|
"""If the maintainer's clone disappears (or git fails), the gate
|
|
keeps its last-known allow-list — better than locking out every
|
|
shipper at once."""
|
|
g = VersionGate(repo, window=10, cache_ttl_s=0)
|
|
head = _commits(repo)[0]
|
|
ok, _ = g.check(head)
|
|
assert ok
|
|
# Now break the repo path.
|
|
g.repo_path = repo / "does-not-exist"
|
|
# Cache should still serve the previously-known head.
|
|
ok, _ = g.check(head)
|
|
assert ok
|