"""Run cis490_doctor.py and PUT the JSON output to the receiver. Triggered by cis490-doctor-check.timer (once a day) or invoked by hand. Best-effort: a doctor that exits with red rows still ships its output — that's the most useful case. Reuses the shipper's transport (mTLS + bearer + receiver URL from lab-host.toml) so we don't reimplement auth. Failure modes: - doctor crashes → exit 2, log error - PUT fails (non-2xx) → exit 1, log error (timer fires next day) - PUT succeeds → exit 0 - mTLS not yet on disk → exit 0 (silent — first-boot path) """ from __future__ import annotations import json import logging import subprocess import sys from pathlib import Path import httpx from shipper.config import ShipperConfig from shipper.transport import ShipperTransport, _build_ssl_context, _CertNotReadyError log = logging.getLogger("cis490.shipper.health-check") def run_doctor(doctor_path: Path, role: str = "lab-host") -> dict: """Run cis490_doctor.py --json --role lab-host. Returns the parsed JSON (which always has a `checks` array — even when reds are present, the doctor exits non-zero but still prints the report). Raises RuntimeError if the doctor crashed without printing JSON.""" venv_py = Path("/opt/cis490/.venv/bin/python") py = str(venv_py) if venv_py.exists() else sys.executable rc = subprocess.run( [py, str(doctor_path), "--role", role, "--json"], capture_output=True, text=True, timeout=120, ) # Doctor exits non-zero when red rows are present — that's # exactly when we MOST want to ship the snapshot. Don't gate on # exit code; gate on whether parseable JSON came out. try: return json.loads(rc.stdout) except json.JSONDecodeError as e: raise RuntimeError( f"doctor produced no JSON (exit={rc.returncode}, " f"stderr={rc.stderr[:500]!r})" ) from e def ship_health(cfg: ShipperConfig, snapshot: dict) -> tuple[int, str]: """PUT snapshot to /v1/host-health/. Reuses the shipper's SSL context build so we get mTLS + the cert-not-ready deferral behaviour for free.""" try: verify = _build_ssl_context(cfg.receiver) except _CertNotReadyError as e: log.info("mTLS material not on disk yet; skipping health ship: %s", e) return 0, "deferred" url = f"{cfg.receiver.url}/v1/host-health/{cfg.host_id}" headers = {"X-Lab-Host": cfg.host_id, "Content-Type": "application/json"} if cfg.receiver.bearer_token: headers["Authorization"] = f"Bearer {cfg.receiver.bearer_token}" try: with httpx.Client(verify=verify, timeout=cfg.request_timeout_s) as c: r = c.put(url, headers=headers, content=json.dumps(snapshot)) except httpx.HTTPError as e: return 1, f"HTTP error: {e}" if 200 <= r.status_code < 300: return 0, f"ok ({r.status_code})" return 1, f"non-2xx: {r.status_code} {r.text[:200]}" def main(argv: list[str] | None = None) -> int: import argparse p = argparse.ArgumentParser(prog="cis490-ship-health-check") p.add_argument("--config", default="/etc/cis490/lab-host.toml") p.add_argument("--doctor", default="/opt/cis490/tools/cis490_doctor.py", type=Path) p.add_argument("--log-level", default="INFO") args = p.parse_args(argv) logging.basicConfig( level=getattr(logging, args.log_level.upper(), logging.INFO), format="%(asctime)s %(levelname)s %(name)s %(message)s", ) try: cfg = ShipperConfig.load(args.config) except (FileNotFoundError, ValueError) as e: log.error("config error: %s", e) return 2 try: snapshot = run_doctor(args.doctor) except (RuntimeError, subprocess.TimeoutExpired, FileNotFoundError) as e: log.error("doctor failed: %s", e) return 2 rc, msg = ship_health(cfg, snapshot) log.info("health ship: %s", msg) return rc if __name__ == "__main__": sys.exit(main())