CIS490/shipper/config.py
max 7c9f9582ca Lab-host shipper + receiver /v1/ping + install scripts
Implements the deployment loop end-to-end on the CIS490 side:

shipper/
  config.py      ShipperConfig (host_id, paths, receiver endpoint, mTLS)
  transport.py   httpx-based PUT + ping with mTLS + bearer support
  queue.py       scan data/episodes/, tar+zstd via system zstd, ship,
                 retire to data/shipped/. Idempotent across crashes per
                 the state machine in docs/transport.md.
  __main__.py    CLI: --ping (smoke test), --once (one pass), or daemon

receiver/app.py: new POST /v1/ping that requires the same auth as PUT
  /v1/episodes but writes nothing. Used by `cis490-shipper --ping`
  during lab-host bring-up to verify the WG/Caddy/mTLS path before
  shipping any real bytes.

etc/
  cis490-shipper.service       systemd unit for the lab-host shipper
  cis490-orchestrator.service  systemd unit for the lab-host queue
                               (kept disabled by default until queue
                               mode lands)
  lab-host.toml.example        config template

scripts/
  install-lab-host.sh   idempotent installer; verifies prereqs,
                        creates cis490 service user, syncs repo to
                        /opt/cis490, builds venv, drops systemd units
                        and config template
  install-receiver.sh   same, for the receiver role on the central WG
                        node (Pi5 in our setup)

tests/test_shipper.py  11 end-to-end tests against a real Uvicorn
                       server hosting the receiver app. Exercises
                       ping, tar+ship, idempotent re-ship, 409
                       conflict, transient (receiver down), tarball
                       round-trip via system zstd.

AGENTS.md  guidance for AI agents working on this and sibling repos.
           Headline: when you hit an issue you can't fully fix in
           scope, file a Forgejo issue rather than leaving a TODO.

51/51 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 23:41:32 -05:00

91 lines
3.2 KiB
Python

"""Lab-host shipper config — loaded from /etc/cis490/lab-host.toml."""
from __future__ import annotations
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
@dataclass(frozen=True)
class ReceiverEndpoint:
url: str # e.g. "https://collector.wg"
ca_bundle: Path | None = None
client_cert: Path | None = None
client_key: Path | None = None
bearer_token: str | None = None
verify_tls: bool = True
@dataclass(frozen=True)
class ShipperConfig:
host_id: str
data_root: Path # Lab-host data root; episodes/, outbox/, shipped/ live here.
receiver: ReceiverEndpoint
# Daemon mode: how often to scan for new done.marker files.
scan_interval_s: float = 5.0
# PUT timeout per episode. Tarballs are bounded by max_episode_bytes;
# at WG speeds this is well under 60s for a typical episode.
request_timeout_s: float = 60.0
# Backoff schedule on transient (5xx / network) failures, in seconds,
# capped at the last entry. The shipper's scan loop will pick the
# episode up again on the next pass regardless.
backoff_seconds: tuple[float, ...] = (1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 60.0, 120.0, 300.0)
# Local retention before pruning data/shipped/.
keep_local_for_days: int = 7
@property
def episodes_dir(self) -> Path:
return self.data_root / "episodes"
@property
def outbox_dir(self) -> Path:
return self.data_root / "outbox"
@property
def shipped_dir(self) -> Path:
return self.data_root / "shipped"
@classmethod
def load(cls, path: str | Path) -> "ShipperConfig":
with open(path, "rb") as f:
data = tomllib.load(f)
host_id = data.get("host_id")
if not isinstance(host_id, str) or not host_id:
raise ValueError("lab-host config: host_id (string) required at top level")
paths = data.get("paths", {})
data_root = Path(paths.get("data_root", "/var/lib/cis490/data")).resolve()
rcv = data.get("receiver", {})
url = rcv.get("url")
if not isinstance(url, str) or not url:
raise ValueError("lab-host config: receiver.url required")
receiver = ReceiverEndpoint(
url=url.rstrip("/"),
ca_bundle=_optional_path(rcv.get("ca_bundle")),
client_cert=_optional_path(rcv.get("client_cert")),
client_key=_optional_path(rcv.get("client_key")),
bearer_token=rcv.get("bearer_token"),
verify_tls=bool(rcv.get("verify_tls", True)),
)
retention = data.get("retention", {})
return cls(
host_id=host_id,
data_root=data_root,
receiver=receiver,
scan_interval_s=float(data.get("shipper", {}).get("scan_interval_s", 5.0)),
request_timeout_s=float(data.get("shipper", {}).get("request_timeout_s", 60.0)),
keep_local_for_days=int(retention.get("keep_local_for_days", 7)),
)
def _optional_path(v: object) -> Path | None:
if v in (None, ""):
return None
if isinstance(v, str):
return Path(v).expanduser()
raise TypeError(f"expected path string, got {type(v).__name__}")