diff --git a/bootstrap/__init__.py b/bootstrap/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstrap/__main__.py b/bootstrap/__main__.py new file mode 100644 index 0000000..39df550 --- /dev/null +++ b/bootstrap/__main__.py @@ -0,0 +1,65 @@ +"""``cis490-bootstrap`` launcher. + +Runs as root (needs CA private key access). Listens on 127.0.0.1:8446 +behind Caddy's ``bootstrap.wg`` site — Caddy terminates TLS, this +service speaks plain HTTP on loopback only. +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +import uvicorn + +from bootstrap.app import make_app + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser(prog="cis490-bootstrap") + p.add_argument("--listen-host", default="127.0.0.1") + p.add_argument("--listen-port", type=int, default=8446) + p.add_argument( + "--issuer-script", + type=Path, + default=Path("/home/max/.env/wg-pki/scripts/issue-cis490-client-cert.sh"), + help="Path to the wg-pki leaf-cert mint script.", + ) + p.add_argument( + "--issued-root", + type=Path, + default=Path("/home/max/.env/wg-pki/issued"), + help="Where minted tarballs are cached.", + ) + p.add_argument("--log-level", default="info") + args = p.parse_args(argv) + + logging.basicConfig( + level=getattr(logging, args.log_level.upper(), logging.INFO), + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + log = logging.getLogger("cis490.bootstrap.main") + + if not args.issuer_script.exists(): + log.error("issuer script missing: %s", args.issuer_script) + return 2 + + app = make_app( + issuer_script=args.issuer_script, + issued_root=args.issued_root, + ) + log.info("listening on %s:%d", args.listen_host, args.listen_port) + uvicorn.run( + app, + host=args.listen_host, + port=args.listen_port, + log_level=args.log_level, + access_log=True, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bootstrap/app.py b/bootstrap/app.py new file mode 100644 index 0000000..35a360e --- /dev/null +++ b/bootstrap/app.py @@ -0,0 +1,146 @@ +"""``cis490-bootstrap`` — auto-issue mTLS leaf certs to enrolled lab hosts. + +This is the chicken-and-egg fix for first-time lab-host setup. A +freshly wg-enrolled device has WG access (and trusts the wg-pki CA) +but has no client cert yet, so it can't authenticate to the +mTLS-protected ``collector.wg``. This service exposes a *plain-TLS* +(no client-auth) endpoint that the lab host can call once during +``install-lab-host.sh`` to retrieve its leaf cert tarball. + +Trust boundary: anything that reaches ``bootstrap.wg`` has already +passed iptmonads' WG-membership check at L4. No further +authentication is required for the bootstrap pull — by the time a +caller can connect at all they're a peer the operator authorized. + +The privilege boundary, on the other hand, is real: minting certs +requires the wg-pki CA private key (root-only at +``/var/lib/wg-pki/cis490-client-ca/ca.key``). This service therefore +runs as root in a tight sandbox (see ``etc/cis490-bootstrap.service``) +and shells out to ``issue-cis490-client-cert.sh`` for each mint. + +Endpoints: + + GET /v1/cert/{host_id} — return tarball of {ca.crt, leaf.pem, leaf.key} + for ``host_id``. Cached — successive calls + return the same bytes. + GET /v1/health — liveness probe (no auth needed). + +Each mint is logged with the source IP (after Caddy's X-Real-IP +forward) so the operator has an audit trail of which devices have +fetched which certs. +""" + +from __future__ import annotations + +import logging +import re +import subprocess +import time +from pathlib import Path +from typing import Awaitable, Callable + +from starlette.applications import Starlette +from starlette.requests import Request +from starlette.responses import FileResponse, JSONResponse, Response +from starlette.routing import Route + + +log = logging.getLogger("cis490.bootstrap") + + +# Sane host_id charset — same rules the receiver enforces, mirrored +# here so mint requests can't smuggle path traversal in. +_HOST_ID_RE = re.compile(r"^[A-Za-z0-9_.-]{1,64}$") + + +def _is_valid_host_id(s: str) -> bool: + return bool(_HOST_ID_RE.match(s)) + + +def make_app( + *, + issuer_script: Path, + issued_root: Path, + rate_limit_window_s: float = 5.0, +) -> Starlette: + """Build the Starlette app. Wired by the production launcher in + ``bootstrap/__main__.py``; tests can pass synthetic paths.""" + issued_root.mkdir(parents=True, exist_ok=True) + + # Coarse per-IP rate limiter to make a casual scan annoying. Not + # a real defense — the WG mesh is the actual perimeter. + last_request: dict[str, float] = {} + + async def health(request: Request) -> Response: + return JSONResponse({"status": "ok"}) + + async def get_cert(request: Request) -> Response: + host_id: str = request.path_params["host_id"] + if not _is_valid_host_id(host_id): + return JSONResponse({"error": "bad host_id"}, status_code=400) + + # Caddy forwards the original WG-side IP via X-Real-IP / + # X-Forwarded-For; fall back to the direct peer if running + # without Caddy in front (tests). + src = ( + request.headers.get("x-real-ip") + or (request.headers.get("x-forwarded-for") or "").split(",")[0].strip() + or (request.client.host if request.client else "?") + ) + + now = time.monotonic() + prev = last_request.get(src, 0.0) + if (now - prev) < rate_limit_window_s: + return JSONResponse( + {"error": "rate limited; back off"}, + status_code=429, + ) + last_request[src] = now + + tar_path = issued_root / host_id / f"{host_id}.tar" + if not tar_path.exists(): + log.info("minting cert for host_id=%s src=%s", host_id, src) + try: + subprocess.run( + [ + str(issuer_script), host_id, + "--out-dir", str(issued_root / host_id), + ], + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except subprocess.CalledProcessError as e: + log.error("issue script failed for %s: rc=%d stderr=%s", + host_id, e.returncode, e.stderr[:500]) + return JSONResponse( + {"error": "mint failed", "detail": e.stderr[:500]}, + status_code=500, + ) + except (OSError, subprocess.TimeoutExpired) as e: + log.exception("issue script transport error for %s", host_id) + return JSONResponse( + {"error": f"transport: {e}"}, + status_code=500, + ) + else: + log.info("cache hit for host_id=%s src=%s", host_id, src) + + if not tar_path.exists(): + return JSONResponse({"error": "tarball not produced"}, status_code=500) + return FileResponse( + tar_path, + media_type="application/x-tar", + filename=f"{host_id}.tar", + headers={ + "X-Cis490-Host-Id": host_id, + "X-Cis490-Cert-Source-IP": src, + }, + ) + + routes = [ + Route("/v1/health", health, methods=["GET"]), + Route("/v1/cert/{host_id}", get_cert, methods=["GET"]), + ] + return Starlette(routes=routes) diff --git a/etc/caddy-root.crt b/etc/caddy-root.crt new file mode 100644 index 0000000..bffc91b --- /dev/null +++ b/etc/caddy-root.crt @@ -0,0 +1,11 @@ +-----BEGIN CERTIFICATE----- +MIIBpDCCAUqgAwIBAgIRAP15YNZS/guq4ES7RfuBBQQwCgYIKoZIzj0EAwIwMDEu +MCwGA1UEAxMlQ2FkZHkgTG9jYWwgQXV0aG9yaXR5IC0gMjAyNiBFQ0MgUm9vdDAe +Fw0yNjA0MjYxMzE5NTZaFw0zNjAzMDQxMzE5NTZaMDAxLjAsBgNVBAMTJUNhZGR5 +IExvY2FsIEF1dGhvcml0eSAtIDIwMjYgRUNDIFJvb3QwWTATBgcqhkjOPQIBBggq +hkjOPQMBBwNCAASjU+sJ+rLPPtTK5t7MsKa6/WDknumPOgxy7uGwGATkd65cHTjz +zTH6+0+uJ7LPZFTJoPSB5WVHrEA0veY8AxH5o0UwQzAOBgNVHQ8BAf8EBAMCAQYw +EgYDVR0TAQH/BAgwBgEB/wIBATAdBgNVHQ4EFgQU8EarYtjVc2EvpYE6OPhDQlYB +docwCgYIKoZIzj0EAwIDSAAwRQIhANxALV9oKSAC4JEB/w1EctnzMfzLyueBpGoB +7p5I07LRAiAKQuhNMeTDSK3Qql+IjunH8UPidETNXfyInwMnbzgAaQ== +-----END CERTIFICATE----- diff --git a/etc/cis490-bootstrap.service b/etc/cis490-bootstrap.service new file mode 100644 index 0000000..cf0bd71 --- /dev/null +++ b/etc/cis490-bootstrap.service @@ -0,0 +1,44 @@ +[Unit] +Description=CIS490 mTLS bootstrap endpoint (auto-issue client certs to enrolled lab hosts) +Documentation=https://maxgit.wg/spectral/CIS490 +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +# Runs as root because the wg-pki CA private key is root-only. The +# service shells out to issue-cis490-client-cert.sh per mint and +# never touches anything else under /var/lib. +User=root +Group=root +WorkingDirectory=/opt/cis490 +ExecStart=/opt/cis490/.venv/bin/python -m bootstrap \ + --listen-host 127.0.0.1 \ + --listen-port 8446 \ + --issuer-script /opt/wg-pki/scripts/issue-cis490-client-cert-wrapper.sh \ + --issued-root /var/lib/wg-pki/issued +Restart=on-failure +RestartSec=5 + +# Hardening — narrower than receiver because this binary's only job +# is to call openssl + tar via the issuer script, then serve files. +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=strict +# /home/max/.env/wg-pki/scripts/ holds the issuer script the wrapper +# exec's. ProtectHome={read-only,tmpfs} both *hide* /home contents +# instead of restricting them to read-only — so we leave /home +# accessible. ProtectSystem=strict still keeps everything outside +# /var/lib/wg-pki write-protected. +ProtectHome=no +ReadWritePaths=/var/lib/wg-pki +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +LockPersonality=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/scripts/install-lab-host.sh b/scripts/install-lab-host.sh index 6690c97..5ebb6ad 100755 --- a/scripts/install-lab-host.sh +++ b/scripts/install-lab-host.sh @@ -112,7 +112,43 @@ FLEET_HOST_ID=$DEFAULT_HOST_ID EOF fi -# --- 7. baseline VM image + cidata (best-effort) ----------------------- +# --- 7. mTLS leaf cert (auto-fetch via bootstrap.wg) ------------------- +# Pull our leaf cert from the Pi's bootstrap endpoint if it isn't +# already on disk. Trust boundary: "reached bootstrap.wg over WG" +# (iptmonads already filters non-peers from 443). Caddy's TLS cert +# is verified against the bundled etc/caddy-root.crt — no chicken- +# and-egg. +HOST_ID="$(grep -E '^host_id\s*=' "$ETC_ROOT/lab-host.toml" 2>/dev/null \ + | head -1 | sed -E 's/^host_id\s*=\s*"([^"]+)".*/\1/')" +if [[ -z "$HOST_ID" || "$HOST_ID" == "REPLACE_ME" ]]; then + log "skipping cert auto-fetch: host_id not set in $ETC_ROOT/lab-host.toml" +elif [[ ! -f "$ETC_ROOT/certs/lab-host.pem" ]]; then + log "fetching leaf cert from https://bootstrap.wg/v1/cert/$HOST_ID" + install -d -m 0755 -o root -g "$SERVICE_USER" "$ETC_ROOT/certs" + TAR="/tmp/cis490-bootstrap-$$.tar" + if curl -fsS --cacert "$REPO_ROOT/etc/caddy-root.crt" \ + --connect-timeout 10 --max-time 60 \ + "https://bootstrap.wg/v1/cert/$HOST_ID" -o "$TAR"; then + tar -C "$ETC_ROOT/certs" -xf "$TAR" + mv "$ETC_ROOT/certs/ca.crt" "$ETC_ROOT/certs/wg-ca.pem" + mv "$ETC_ROOT/certs/$HOST_ID.pem" "$ETC_ROOT/certs/lab-host.pem" + mv "$ETC_ROOT/certs/$HOST_ID.key" "$ETC_ROOT/certs/lab-host.key" + chown root:"$SERVICE_USER" "$ETC_ROOT/certs/"*.pem \ + "$ETC_ROOT/certs/lab-host.key" + chmod 0644 "$ETC_ROOT/certs/"*.pem + chmod 0640 "$ETC_ROOT/certs/lab-host.key" + rm -f "$TAR" + log "leaf cert installed for host_id=$HOST_ID" + else + rm -f "$TAR" + log "WARN: bootstrap.wg fetch failed — make sure /etc/hosts maps it" + log " to 10.100.0.1 and that wg0 is up. cert delivery skipped." + fi +else + log "$ETC_ROOT/certs/lab-host.pem present; skipping auto-fetch" +fi + +# --- 8. baseline VM image + cidata (best-effort) ----------------------- ALPINE_IMG="$DATA_ROOT/vm/images/alpine-baseline.qcow2" CIDATA_ISO="$DATA_ROOT/vm/images/cidata.iso" if [[ ! -f "$ALPINE_IMG" ]]; then diff --git a/scripts/install-receiver.sh b/scripts/install-receiver.sh index 12fd6ec..515ce26 100755 --- a/scripts/install-receiver.sh +++ b/scripts/install-receiver.sh @@ -75,9 +75,11 @@ else fi # --- 4. systemd -------------------------------------------------------- -log "installing systemd unit" +log "installing systemd units (receiver + bootstrap)" install -m 0644 "$REPO_ROOT/etc/cis490-receiver.service" \ /etc/systemd/system/cis490-receiver.service +install -m 0644 "$REPO_ROOT/etc/cis490-bootstrap.service" \ + /etc/systemd/system/cis490-bootstrap.service systemctl daemon-reload # --- 5. config template (only on first install) ----------------------- @@ -97,8 +99,12 @@ if [[ ! -f "$ETC_ROOT/receiver.toml" ]]; then log " (mTLS to clients is enforced by the wg-pki CA bundle on" log " the receiver side once leaf certs are issued.)" log " 3. Open the WG-side port via iptmonads." - log " 4. systemctl enable --now cis490-receiver" + log " 4. systemctl enable --now cis490-receiver cis490-bootstrap" log " 5. From a lab host: cis490-shipper --ping" + log "" + log "Bootstrap endpoint (cis490-bootstrap on :8446 + Caddy bootstrap.wg)" + log "lets enrolled lab hosts auto-fetch their leaf certs. Without it," + log "operators have to hand-carry tarballs via deploy-cis490-cert.sh." else log "$ETC_ROOT/receiver.toml exists; leaving in place" fi diff --git a/scripts/issue-cis490-client-cert-wrapper.sh b/scripts/issue-cis490-client-cert-wrapper.sh new file mode 100755 index 0000000..a4fe68e --- /dev/null +++ b/scripts/issue-cis490-client-cert-wrapper.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Wrapper that re-points the wg-pki issuer script's relative-path +# assumption (PWD-derived publish dir, $REPO_ROOT/issued/) to the +# absolute /var/lib/wg-pki/issued/ that the bootstrap service uses. +# +# wg-pki ships the actual issuer at +# /home/max/.env/wg-pki/scripts/issue-cis490-client-cert.sh, which +# computes paths relative to its own location. This wrapper sets +# WG_PKI_STATE so the CA key is found in /var/lib/wg-pki, and forces +# --out-dir to a path under /var/lib so cis490-bootstrap (with +# ProtectHome=tmpfs) can write the resulting tarballs. + +set -euo pipefail + +# Resolve issuer path: prefer the install-time copy at /opt/wg-pki/, +# fall back to whatever wg-pki clone the operator has under /home. +ISSUER="${WG_PKI_ISSUER:-}" +if [[ -z "$ISSUER" ]]; then + for cand in \ + /opt/wg-pki/scripts/issue-cis490-client-cert.sh \ + /home/max/wg-pki/scripts/issue-cis490-client-cert.sh \ + /home/max/.env/wg-pki/scripts/issue-cis490-client-cert.sh; do + if [[ -x "$cand" ]]; then ISSUER="$cand"; break; fi + done +fi +if [[ -z "$ISSUER" || ! -x "$ISSUER" ]]; then + echo "wrapper: no issue-cis490-client-cert.sh found; tried /opt/wg-pki, /home/max/wg-pki, /home/max/.env/wg-pki" >&2 + exit 2 +fi +OUT_ROOT="/var/lib/wg-pki/issued" + +if [[ $# -lt 1 ]]; then + echo "usage: $0 [--out-dir DIR] [--days N]" >&2 + exit 2 +fi + +HOST_ID="$1"; shift + +# Pull off any --out-dir already passed; we override. +EXTRA=() +while [[ $# -gt 0 ]]; do + case "$1" in + --out-dir) shift 2 ;; # drop, we set it ourselves + *) EXTRA+=("$1"); shift ;; + esac +done + +mkdir -p "$OUT_ROOT/$HOST_ID" +exec env WG_PKI_STATE=/var/lib/wg-pki \ + "$ISSUER" "$HOST_ID" --out-dir "$OUT_ROOT/$HOST_ID" "${EXTRA[@]}"