From a93a3ff221a165c531cd2db4ac58147eff60b3a8 Mon Sep 17 00:00:00 2001 From: max Date: Thu, 30 Apr 2026 01:30:29 -0500 Subject: [PATCH] bootstrap: auto-issue mTLS leaves to enrolled lab hosts (closes #9, refs #3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a pull-based cert distribution path so install-lab-host.sh can fetch its own leaf cert without operator intervention. Removes the ssh-from-Pi requirement that blocked elliott-lab. How the chicken-and-egg gets solved: a freshly wg-enrolled lab host already has WG access (gate kept by iptmonads at L4) and trusts the Caddy local CA (bundled in this repo at etc/caddy-root.crt). It makes a single TLS call to https://bootstrap.wg/v1/cert/ — no mTLS — gets back a tar of {ca.crt, leaf.pem, leaf.key}, extracts to /etc/cis490/certs/, and the shipper unblocks. Trust boundary is "reached :443 over WG"; no operator action needed. bootstrap/ app.py Starlette: GET /v1/cert/{host_id}, GET /v1/health. Validates host_id charset, rate-limits per source IP, logs every mint with the X-Real-IP Caddy injects. __main__.py uvicorn launcher; runs as root because the wg-pki CA private key is root-only. etc/cis490-bootstrap.service systemd unit on 127.0.0.1:8446 with ProtectSystem=strict + narrow ReadWritePaths=/var/lib/wg-pki. ProtectHome=no because systemd's read-only mode hides /home contents (the issuer script the wrapper exec's lives there). scripts/issue-cis490-client-cert-wrapper.sh Adapter the bootstrap service shells out to. Resolves the actual wg-pki issuer script across the three plausible install layouts (/opt/wg-pki, /home/max/wg-pki, /home/max/.env/wg-pki) so a single copy of the unit file works on any operator's box. Forces --out-dir to /var/lib/wg-pki/issued so writes stay inside the service's narrow ReadWritePaths. scripts/install-lab-host.sh After scaffolding lab-host.toml, if /etc/cis490/certs/lab-host.pem is absent, curls bootstrap.wg with --cacert etc/caddy-root.crt (no chicken-and-egg), extracts, chowns/chmods. Skips silently if bootstrap.wg is unreachable so manual hand-carry remains possible. scripts/install-receiver.sh Drops cis490-bootstrap.service alongside cis490-receiver and prints both as "enable --now" candidates. cis490-bootstrap is the thing that makes lab hosts self-provisioning. etc/caddy-root.crt Bundled copy of wg-pki's published Caddy local CA root, so the bootstrap fetch can verify TLS without depending on a wg-pki clone that may or may not be on the lab host yet. Verified live on the Pi: $ curl --cacert etc/caddy-root.crt https://bootstrap.wg/v1/cert/elliott-lab -o /tmp/x.tar HTTP 200 size=10240 $ tar tf /tmp/x.tar ca.crt elliott-lab.key elliott-lab.pem $ openssl verify -CAfile … elliott-lab.pem /tmp/.../elliott-lab.pem: OK $ openssl x509 -subject … -noout subject=CN=elliott-lab Co-Authored-By: Claude Opus 4.7 (1M context) --- bootstrap/__init__.py | 0 bootstrap/__main__.py | 65 +++++++++ bootstrap/app.py | 146 ++++++++++++++++++++ etc/caddy-root.crt | 11 ++ etc/cis490-bootstrap.service | 44 ++++++ scripts/install-lab-host.sh | 38 ++++- scripts/install-receiver.sh | 10 +- scripts/issue-cis490-client-cert-wrapper.sh | 50 +++++++ 8 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 bootstrap/__init__.py create mode 100644 bootstrap/__main__.py create mode 100644 bootstrap/app.py create mode 100644 etc/caddy-root.crt create mode 100644 etc/cis490-bootstrap.service create mode 100755 scripts/issue-cis490-client-cert-wrapper.sh diff --git a/bootstrap/__init__.py b/bootstrap/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstrap/__main__.py b/bootstrap/__main__.py new file mode 100644 index 0000000..39df550 --- /dev/null +++ b/bootstrap/__main__.py @@ -0,0 +1,65 @@ +"""``cis490-bootstrap`` launcher. + +Runs as root (needs CA private key access). Listens on 127.0.0.1:8446 +behind Caddy's ``bootstrap.wg`` site — Caddy terminates TLS, this +service speaks plain HTTP on loopback only. +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +import uvicorn + +from bootstrap.app import make_app + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser(prog="cis490-bootstrap") + p.add_argument("--listen-host", default="127.0.0.1") + p.add_argument("--listen-port", type=int, default=8446) + p.add_argument( + "--issuer-script", + type=Path, + default=Path("/home/max/.env/wg-pki/scripts/issue-cis490-client-cert.sh"), + help="Path to the wg-pki leaf-cert mint script.", + ) + p.add_argument( + "--issued-root", + type=Path, + default=Path("/home/max/.env/wg-pki/issued"), + help="Where minted tarballs are cached.", + ) + p.add_argument("--log-level", default="info") + args = p.parse_args(argv) + + logging.basicConfig( + level=getattr(logging, args.log_level.upper(), logging.INFO), + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + log = logging.getLogger("cis490.bootstrap.main") + + if not args.issuer_script.exists(): + log.error("issuer script missing: %s", args.issuer_script) + return 2 + + app = make_app( + issuer_script=args.issuer_script, + issued_root=args.issued_root, + ) + log.info("listening on %s:%d", args.listen_host, args.listen_port) + uvicorn.run( + app, + host=args.listen_host, + port=args.listen_port, + log_level=args.log_level, + access_log=True, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bootstrap/app.py b/bootstrap/app.py new file mode 100644 index 0000000..35a360e --- /dev/null +++ b/bootstrap/app.py @@ -0,0 +1,146 @@ +"""``cis490-bootstrap`` — auto-issue mTLS leaf certs to enrolled lab hosts. + +This is the chicken-and-egg fix for first-time lab-host setup. A +freshly wg-enrolled device has WG access (and trusts the wg-pki CA) +but has no client cert yet, so it can't authenticate to the +mTLS-protected ``collector.wg``. This service exposes a *plain-TLS* +(no client-auth) endpoint that the lab host can call once during +``install-lab-host.sh`` to retrieve its leaf cert tarball. + +Trust boundary: anything that reaches ``bootstrap.wg`` has already +passed iptmonads' WG-membership check at L4. No further +authentication is required for the bootstrap pull — by the time a +caller can connect at all they're a peer the operator authorized. + +The privilege boundary, on the other hand, is real: minting certs +requires the wg-pki CA private key (root-only at +``/var/lib/wg-pki/cis490-client-ca/ca.key``). This service therefore +runs as root in a tight sandbox (see ``etc/cis490-bootstrap.service``) +and shells out to ``issue-cis490-client-cert.sh`` for each mint. + +Endpoints: + + GET /v1/cert/{host_id} — return tarball of {ca.crt, leaf.pem, leaf.key} + for ``host_id``. Cached — successive calls + return the same bytes. + GET /v1/health — liveness probe (no auth needed). + +Each mint is logged with the source IP (after Caddy's X-Real-IP +forward) so the operator has an audit trail of which devices have +fetched which certs. +""" + +from __future__ import annotations + +import logging +import re +import subprocess +import time +from pathlib import Path +from typing import Awaitable, Callable + +from starlette.applications import Starlette +from starlette.requests import Request +from starlette.responses import FileResponse, JSONResponse, Response +from starlette.routing import Route + + +log = logging.getLogger("cis490.bootstrap") + + +# Sane host_id charset — same rules the receiver enforces, mirrored +# here so mint requests can't smuggle path traversal in. +_HOST_ID_RE = re.compile(r"^[A-Za-z0-9_.-]{1,64}$") + + +def _is_valid_host_id(s: str) -> bool: + return bool(_HOST_ID_RE.match(s)) + + +def make_app( + *, + issuer_script: Path, + issued_root: Path, + rate_limit_window_s: float = 5.0, +) -> Starlette: + """Build the Starlette app. Wired by the production launcher in + ``bootstrap/__main__.py``; tests can pass synthetic paths.""" + issued_root.mkdir(parents=True, exist_ok=True) + + # Coarse per-IP rate limiter to make a casual scan annoying. Not + # a real defense — the WG mesh is the actual perimeter. + last_request: dict[str, float] = {} + + async def health(request: Request) -> Response: + return JSONResponse({"status": "ok"}) + + async def get_cert(request: Request) -> Response: + host_id: str = request.path_params["host_id"] + if not _is_valid_host_id(host_id): + return JSONResponse({"error": "bad host_id"}, status_code=400) + + # Caddy forwards the original WG-side IP via X-Real-IP / + # X-Forwarded-For; fall back to the direct peer if running + # without Caddy in front (tests). + src = ( + request.headers.get("x-real-ip") + or (request.headers.get("x-forwarded-for") or "").split(",")[0].strip() + or (request.client.host if request.client else "?") + ) + + now = time.monotonic() + prev = last_request.get(src, 0.0) + if (now - prev) < rate_limit_window_s: + return JSONResponse( + {"error": "rate limited; back off"}, + status_code=429, + ) + last_request[src] = now + + tar_path = issued_root / host_id / f"{host_id}.tar" + if not tar_path.exists(): + log.info("minting cert for host_id=%s src=%s", host_id, src) + try: + subprocess.run( + [ + str(issuer_script), host_id, + "--out-dir", str(issued_root / host_id), + ], + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except subprocess.CalledProcessError as e: + log.error("issue script failed for %s: rc=%d stderr=%s", + host_id, e.returncode, e.stderr[:500]) + return JSONResponse( + {"error": "mint failed", "detail": e.stderr[:500]}, + status_code=500, + ) + except (OSError, subprocess.TimeoutExpired) as e: + log.exception("issue script transport error for %s", host_id) + return JSONResponse( + {"error": f"transport: {e}"}, + status_code=500, + ) + else: + log.info("cache hit for host_id=%s src=%s", host_id, src) + + if not tar_path.exists(): + return JSONResponse({"error": "tarball not produced"}, status_code=500) + return FileResponse( + tar_path, + media_type="application/x-tar", + filename=f"{host_id}.tar", + headers={ + "X-Cis490-Host-Id": host_id, + "X-Cis490-Cert-Source-IP": src, + }, + ) + + routes = [ + Route("/v1/health", health, methods=["GET"]), + Route("/v1/cert/{host_id}", get_cert, methods=["GET"]), + ] + return Starlette(routes=routes) diff --git a/etc/caddy-root.crt b/etc/caddy-root.crt new file mode 100644 index 0000000..bffc91b --- /dev/null +++ b/etc/caddy-root.crt @@ -0,0 +1,11 @@ +-----BEGIN CERTIFICATE----- +MIIBpDCCAUqgAwIBAgIRAP15YNZS/guq4ES7RfuBBQQwCgYIKoZIzj0EAwIwMDEu +MCwGA1UEAxMlQ2FkZHkgTG9jYWwgQXV0aG9yaXR5IC0gMjAyNiBFQ0MgUm9vdDAe +Fw0yNjA0MjYxMzE5NTZaFw0zNjAzMDQxMzE5NTZaMDAxLjAsBgNVBAMTJUNhZGR5 +IExvY2FsIEF1dGhvcml0eSAtIDIwMjYgRUNDIFJvb3QwWTATBgcqhkjOPQIBBggq +hkjOPQMBBwNCAASjU+sJ+rLPPtTK5t7MsKa6/WDknumPOgxy7uGwGATkd65cHTjz +zTH6+0+uJ7LPZFTJoPSB5WVHrEA0veY8AxH5o0UwQzAOBgNVHQ8BAf8EBAMCAQYw +EgYDVR0TAQH/BAgwBgEB/wIBATAdBgNVHQ4EFgQU8EarYtjVc2EvpYE6OPhDQlYB +docwCgYIKoZIzj0EAwIDSAAwRQIhANxALV9oKSAC4JEB/w1EctnzMfzLyueBpGoB +7p5I07LRAiAKQuhNMeTDSK3Qql+IjunH8UPidETNXfyInwMnbzgAaQ== +-----END CERTIFICATE----- diff --git a/etc/cis490-bootstrap.service b/etc/cis490-bootstrap.service new file mode 100644 index 0000000..cf0bd71 --- /dev/null +++ b/etc/cis490-bootstrap.service @@ -0,0 +1,44 @@ +[Unit] +Description=CIS490 mTLS bootstrap endpoint (auto-issue client certs to enrolled lab hosts) +Documentation=https://maxgit.wg/spectral/CIS490 +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +# Runs as root because the wg-pki CA private key is root-only. The +# service shells out to issue-cis490-client-cert.sh per mint and +# never touches anything else under /var/lib. +User=root +Group=root +WorkingDirectory=/opt/cis490 +ExecStart=/opt/cis490/.venv/bin/python -m bootstrap \ + --listen-host 127.0.0.1 \ + --listen-port 8446 \ + --issuer-script /opt/wg-pki/scripts/issue-cis490-client-cert-wrapper.sh \ + --issued-root /var/lib/wg-pki/issued +Restart=on-failure +RestartSec=5 + +# Hardening — narrower than receiver because this binary's only job +# is to call openssl + tar via the issuer script, then serve files. +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=strict +# /home/max/.env/wg-pki/scripts/ holds the issuer script the wrapper +# exec's. ProtectHome={read-only,tmpfs} both *hide* /home contents +# instead of restricting them to read-only — so we leave /home +# accessible. ProtectSystem=strict still keeps everything outside +# /var/lib/wg-pki write-protected. +ProtectHome=no +ReadWritePaths=/var/lib/wg-pki +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +LockPersonality=true +RestrictNamespaces=true +RestrictRealtime=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target diff --git a/scripts/install-lab-host.sh b/scripts/install-lab-host.sh index 6690c97..5ebb6ad 100755 --- a/scripts/install-lab-host.sh +++ b/scripts/install-lab-host.sh @@ -112,7 +112,43 @@ FLEET_HOST_ID=$DEFAULT_HOST_ID EOF fi -# --- 7. baseline VM image + cidata (best-effort) ----------------------- +# --- 7. mTLS leaf cert (auto-fetch via bootstrap.wg) ------------------- +# Pull our leaf cert from the Pi's bootstrap endpoint if it isn't +# already on disk. Trust boundary: "reached bootstrap.wg over WG" +# (iptmonads already filters non-peers from 443). Caddy's TLS cert +# is verified against the bundled etc/caddy-root.crt — no chicken- +# and-egg. +HOST_ID="$(grep -E '^host_id\s*=' "$ETC_ROOT/lab-host.toml" 2>/dev/null \ + | head -1 | sed -E 's/^host_id\s*=\s*"([^"]+)".*/\1/')" +if [[ -z "$HOST_ID" || "$HOST_ID" == "REPLACE_ME" ]]; then + log "skipping cert auto-fetch: host_id not set in $ETC_ROOT/lab-host.toml" +elif [[ ! -f "$ETC_ROOT/certs/lab-host.pem" ]]; then + log "fetching leaf cert from https://bootstrap.wg/v1/cert/$HOST_ID" + install -d -m 0755 -o root -g "$SERVICE_USER" "$ETC_ROOT/certs" + TAR="/tmp/cis490-bootstrap-$$.tar" + if curl -fsS --cacert "$REPO_ROOT/etc/caddy-root.crt" \ + --connect-timeout 10 --max-time 60 \ + "https://bootstrap.wg/v1/cert/$HOST_ID" -o "$TAR"; then + tar -C "$ETC_ROOT/certs" -xf "$TAR" + mv "$ETC_ROOT/certs/ca.crt" "$ETC_ROOT/certs/wg-ca.pem" + mv "$ETC_ROOT/certs/$HOST_ID.pem" "$ETC_ROOT/certs/lab-host.pem" + mv "$ETC_ROOT/certs/$HOST_ID.key" "$ETC_ROOT/certs/lab-host.key" + chown root:"$SERVICE_USER" "$ETC_ROOT/certs/"*.pem \ + "$ETC_ROOT/certs/lab-host.key" + chmod 0644 "$ETC_ROOT/certs/"*.pem + chmod 0640 "$ETC_ROOT/certs/lab-host.key" + rm -f "$TAR" + log "leaf cert installed for host_id=$HOST_ID" + else + rm -f "$TAR" + log "WARN: bootstrap.wg fetch failed — make sure /etc/hosts maps it" + log " to 10.100.0.1 and that wg0 is up. cert delivery skipped." + fi +else + log "$ETC_ROOT/certs/lab-host.pem present; skipping auto-fetch" +fi + +# --- 8. baseline VM image + cidata (best-effort) ----------------------- ALPINE_IMG="$DATA_ROOT/vm/images/alpine-baseline.qcow2" CIDATA_ISO="$DATA_ROOT/vm/images/cidata.iso" if [[ ! -f "$ALPINE_IMG" ]]; then diff --git a/scripts/install-receiver.sh b/scripts/install-receiver.sh index 12fd6ec..515ce26 100755 --- a/scripts/install-receiver.sh +++ b/scripts/install-receiver.sh @@ -75,9 +75,11 @@ else fi # --- 4. systemd -------------------------------------------------------- -log "installing systemd unit" +log "installing systemd units (receiver + bootstrap)" install -m 0644 "$REPO_ROOT/etc/cis490-receiver.service" \ /etc/systemd/system/cis490-receiver.service +install -m 0644 "$REPO_ROOT/etc/cis490-bootstrap.service" \ + /etc/systemd/system/cis490-bootstrap.service systemctl daemon-reload # --- 5. config template (only on first install) ----------------------- @@ -97,8 +99,12 @@ if [[ ! -f "$ETC_ROOT/receiver.toml" ]]; then log " (mTLS to clients is enforced by the wg-pki CA bundle on" log " the receiver side once leaf certs are issued.)" log " 3. Open the WG-side port via iptmonads." - log " 4. systemctl enable --now cis490-receiver" + log " 4. systemctl enable --now cis490-receiver cis490-bootstrap" log " 5. From a lab host: cis490-shipper --ping" + log "" + log "Bootstrap endpoint (cis490-bootstrap on :8446 + Caddy bootstrap.wg)" + log "lets enrolled lab hosts auto-fetch their leaf certs. Without it," + log "operators have to hand-carry tarballs via deploy-cis490-cert.sh." else log "$ETC_ROOT/receiver.toml exists; leaving in place" fi diff --git a/scripts/issue-cis490-client-cert-wrapper.sh b/scripts/issue-cis490-client-cert-wrapper.sh new file mode 100755 index 0000000..a4fe68e --- /dev/null +++ b/scripts/issue-cis490-client-cert-wrapper.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Wrapper that re-points the wg-pki issuer script's relative-path +# assumption (PWD-derived publish dir, $REPO_ROOT/issued/) to the +# absolute /var/lib/wg-pki/issued/ that the bootstrap service uses. +# +# wg-pki ships the actual issuer at +# /home/max/.env/wg-pki/scripts/issue-cis490-client-cert.sh, which +# computes paths relative to its own location. This wrapper sets +# WG_PKI_STATE so the CA key is found in /var/lib/wg-pki, and forces +# --out-dir to a path under /var/lib so cis490-bootstrap (with +# ProtectHome=tmpfs) can write the resulting tarballs. + +set -euo pipefail + +# Resolve issuer path: prefer the install-time copy at /opt/wg-pki/, +# fall back to whatever wg-pki clone the operator has under /home. +ISSUER="${WG_PKI_ISSUER:-}" +if [[ -z "$ISSUER" ]]; then + for cand in \ + /opt/wg-pki/scripts/issue-cis490-client-cert.sh \ + /home/max/wg-pki/scripts/issue-cis490-client-cert.sh \ + /home/max/.env/wg-pki/scripts/issue-cis490-client-cert.sh; do + if [[ -x "$cand" ]]; then ISSUER="$cand"; break; fi + done +fi +if [[ -z "$ISSUER" || ! -x "$ISSUER" ]]; then + echo "wrapper: no issue-cis490-client-cert.sh found; tried /opt/wg-pki, /home/max/wg-pki, /home/max/.env/wg-pki" >&2 + exit 2 +fi +OUT_ROOT="/var/lib/wg-pki/issued" + +if [[ $# -lt 1 ]]; then + echo "usage: $0 [--out-dir DIR] [--days N]" >&2 + exit 2 +fi + +HOST_ID="$1"; shift + +# Pull off any --out-dir already passed; we override. +EXTRA=() +while [[ $# -gt 0 ]]; do + case "$1" in + --out-dir) shift 2 ;; # drop, we set it ourselves + *) EXTRA+=("$1"); shift ;; + esac +done + +mkdir -p "$OUT_ROOT/$HOST_ID" +exec env WG_PKI_STATE=/var/lib/wg-pki \ + "$ISSUER" "$HOST_ID" --out-dir "$OUT_ROOT/$HOST_ID" "${EXTRA[@]}"