Implements the deployment loop end-to-end on the CIS490 side:
shipper/
config.py ShipperConfig (host_id, paths, receiver endpoint, mTLS)
transport.py httpx-based PUT + ping with mTLS + bearer support
queue.py scan data/episodes/, tar+zstd via system zstd, ship,
retire to data/shipped/. Idempotent across crashes per
the state machine in docs/transport.md.
__main__.py CLI: --ping (smoke test), --once (one pass), or daemon
receiver/app.py: new POST /v1/ping that requires the same auth as PUT
/v1/episodes but writes nothing. Used by `cis490-shipper --ping`
during lab-host bring-up to verify the WG/Caddy/mTLS path before
shipping any real bytes.
etc/
cis490-shipper.service systemd unit for the lab-host shipper
cis490-orchestrator.service systemd unit for the lab-host queue
(kept disabled by default until queue
mode lands)
lab-host.toml.example config template
scripts/
install-lab-host.sh idempotent installer; verifies prereqs,
creates cis490 service user, syncs repo to
/opt/cis490, builds venv, drops systemd units
and config template
install-receiver.sh same, for the receiver role on the central WG
node (Pi5 in our setup)
tests/test_shipper.py 11 end-to-end tests against a real Uvicorn
server hosting the receiver app. Exercises
ping, tar+ship, idempotent re-ship, 409
conflict, transient (receiver down), tarball
round-trip via system zstd.
AGENTS.md guidance for AI agents working on this and sibling repos.
Headline: when you hit an issue you can't fully fix in
scope, file a Forgejo issue rather than leaving a TODO.
51/51 tests pass.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
203 lines
6.3 KiB
Python
203 lines
6.3 KiB
Python
"""HTTP transport for the lab-host shipper.
|
|
|
|
Two operations against the receiver:
|
|
POST /v1/ping — smoke test
|
|
PUT /v1/episodes/<host>/<episode>.tar.zst — episode upload
|
|
|
|
Auth is mTLS (client cert from wg-pki) when configured. A bearer token
|
|
is supported as a stand-in during early bring-up before the cert is
|
|
issued; production runs should set both.
|
|
|
|
The transport returns small dataclasses rather than throwing — the
|
|
caller (shipper queue) decides whether to retry, move to shipped/, or
|
|
alert. This keeps the retry policy in one place.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import logging
|
|
import ssl
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
from .config import ReceiverEndpoint, ShipperConfig
|
|
|
|
|
|
log = logging.getLogger("cis490.shipper.transport")
|
|
|
|
|
|
SCHEMA_VERSION = 1
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PingResult:
|
|
ok: bool
|
|
status_code: int
|
|
body: dict[str, Any] | None
|
|
error: str | None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ShipResult:
|
|
status: str # "stored" | "already-present" | "conflict" | "transient" | "fatal"
|
|
status_code: int
|
|
sha256: str | None
|
|
body: dict[str, Any] | None
|
|
error: str | None
|
|
|
|
|
|
def _build_ssl_context(rcv: ReceiverEndpoint) -> ssl.SSLContext | bool:
|
|
"""Build an SSL context honoring the wg-pki CA bundle + client cert.
|
|
|
|
Returns True / a bundle path / a context. httpx accepts all three;
|
|
we use a context so we can attach the client cert for mTLS."""
|
|
if not rcv.url.lower().startswith("https://"):
|
|
return False
|
|
ctx = ssl.create_default_context(
|
|
cafile=str(rcv.ca_bundle) if rcv.ca_bundle else None,
|
|
)
|
|
if not rcv.verify_tls:
|
|
# Dev-only path; production lab-hosts should always pin the
|
|
# wg-pki CA. Logged loudly so it doesn't slip through.
|
|
log.warning("TLS verification disabled — dev-only configuration")
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
if rcv.client_cert and rcv.client_key:
|
|
ctx.load_cert_chain(str(rcv.client_cert), str(rcv.client_key))
|
|
return ctx
|
|
|
|
|
|
class ShipperTransport:
|
|
def __init__(self, cfg: ShipperConfig) -> None:
|
|
self.cfg = cfg
|
|
self._verify = _build_ssl_context(cfg.receiver)
|
|
|
|
# ---- ping ----------------------------------------------------------
|
|
|
|
def ping(self) -> PingResult:
|
|
url = f"{self.cfg.receiver.url}/v1/ping"
|
|
headers = self._common_headers()
|
|
try:
|
|
with httpx.Client(verify=self._verify, timeout=self.cfg.request_timeout_s) as c:
|
|
r = c.post(url, headers=headers, content=b"")
|
|
except httpx.HTTPError as e:
|
|
return PingResult(ok=False, status_code=0, body=None, error=str(e))
|
|
|
|
body: dict[str, Any] | None = None
|
|
try:
|
|
body = r.json()
|
|
except Exception:
|
|
pass
|
|
|
|
if r.status_code == 200 and isinstance(body, dict) and body.get("ok"):
|
|
return PingResult(ok=True, status_code=200, body=body, error=None)
|
|
return PingResult(
|
|
ok=False,
|
|
status_code=r.status_code,
|
|
body=body,
|
|
error=f"unexpected status {r.status_code}",
|
|
)
|
|
|
|
# ---- ship ----------------------------------------------------------
|
|
|
|
def ship_tarball(
|
|
self,
|
|
episode_id: str,
|
|
tarball_path: Path,
|
|
sha256_hex: str,
|
|
) -> ShipResult:
|
|
url = (
|
|
f"{self.cfg.receiver.url}/v1/episodes/"
|
|
f"{self.cfg.host_id}/{episode_id}.tar.zst"
|
|
)
|
|
size = tarball_path.stat().st_size
|
|
headers = self._common_headers() | {
|
|
"Content-Type": "application/zstd",
|
|
"Content-Length": str(size),
|
|
"X-Content-SHA256": sha256_hex,
|
|
"X-Episode-Id": episode_id,
|
|
}
|
|
|
|
try:
|
|
with httpx.Client(verify=self._verify, timeout=self.cfg.request_timeout_s) as c, \
|
|
tarball_path.open("rb") as body:
|
|
# httpx streams from a file-like object via the `content=` kwarg.
|
|
r = c.put(url, headers=headers, content=body)
|
|
except httpx.HTTPError as e:
|
|
return ShipResult(
|
|
status="transient",
|
|
status_code=0,
|
|
sha256=None,
|
|
body=None,
|
|
error=str(e),
|
|
)
|
|
|
|
body_json: dict[str, Any] | None = None
|
|
try:
|
|
body_json = r.json()
|
|
except Exception:
|
|
pass
|
|
|
|
if r.status_code == 201:
|
|
return ShipResult(
|
|
status="stored",
|
|
status_code=201,
|
|
sha256=sha256_hex,
|
|
body=body_json,
|
|
error=None,
|
|
)
|
|
if r.status_code == 200:
|
|
return ShipResult(
|
|
status="already-present",
|
|
status_code=200,
|
|
sha256=sha256_hex,
|
|
body=body_json,
|
|
error=None,
|
|
)
|
|
if r.status_code == 409:
|
|
return ShipResult(
|
|
status="conflict",
|
|
status_code=409,
|
|
sha256=sha256_hex,
|
|
body=body_json,
|
|
error="receiver already has a different sha256 for this id",
|
|
)
|
|
if 500 <= r.status_code < 600:
|
|
return ShipResult(
|
|
status="transient",
|
|
status_code=r.status_code,
|
|
sha256=None,
|
|
body=body_json,
|
|
error=f"server error {r.status_code}",
|
|
)
|
|
# 4xx other than 409: caller-side bug — don't retry.
|
|
return ShipResult(
|
|
status="fatal",
|
|
status_code=r.status_code,
|
|
sha256=None,
|
|
body=body_json,
|
|
error=f"client error {r.status_code}",
|
|
)
|
|
|
|
# ---- helpers -------------------------------------------------------
|
|
|
|
def _common_headers(self) -> dict[str, str]:
|
|
h: dict[str, str] = {
|
|
"X-Lab-Host": self.cfg.host_id,
|
|
"X-Schema-Version": str(SCHEMA_VERSION),
|
|
}
|
|
if self.cfg.receiver.bearer_token:
|
|
h["Authorization"] = f"Bearer {self.cfg.receiver.bearer_token}"
|
|
return h
|
|
|
|
|
|
def hash_file(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|