diff --git a/AGENTS.md b/AGENTS.md index 6549ebb..2b30ad3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -110,7 +110,44 @@ disk, the next wave produces Tier-3 episodes (`meta.exploit.module_name` populated). No orchestrator restart is required, but a restart speeds up the switch. -### Tier-4 (real malware execution) is opt-in, also push-button +### Tier-4 (real malware execution) is mandatory, push-button after one-time Pi setup + +**Real-binary episodes are the project's training target — Tier-4 is +NOT optional.** A lab-host deploy that lands without real samples +fails loudly; mimic-only data does not answer the research question. + +**One-time, on the Pi (operator runs once, ever):** + +```sh +sudo MALWAREBAZAAR_API_KEY= /opt/cis490/scripts/set-malwarebazaar-key.sh +``` + +Free signup at https://bazaar.abuse.ch/. The key lands at +`/etc/cis490/secrets/malwarebazaar.token` (mode 0640, root:cis490). +The bootstrap service's `/v1/secret/malwarebazaar` endpoint then +serves it to every lab host — same trust boundary as the cert +endpoint (WG mesh, iptmonads-gated). + +**Per lab host (auto):** `install-tier-3-4.sh` resolves the MB key +in priority order: + +1. `MALWAREBAZAAR_API_KEY` env var +2. `/opt/cis490/samples/.bazaar.token` (cached from a previous run) +3. `https://bootstrap.wg/v1/secret/malwarebazaar` (auto-distributed + from the Pi) + +If all three fail, the deploy aborts with the exact remediation +command. Once the key resolves, `tools/auto_fetch_samples.py` walks +each manifest family, queries MB by signature, fetches the first +match, sha256-verifies on the way in, lands the binary at +`/opt/cis490/samples/store/`, and rewrites `manifest.toml` +in place. The orchestrator's next selection that picks a sample +with `kind == "real"` runs the real binary via the chunked-upload +path. + +If `auto_fetch_samples.py` lands zero binaries (zero successful MB +queries), `install-tier-3-4.sh` exits non-zero. **No silent +mimic-only fallback** — the project's data depends on real samples. Set `MALWAREBAZAAR_API_KEY` (free signup at https://bazaar.abuse.ch/) before running `install-tier-3-4.sh` and step 5 runs diff --git a/bootstrap/__main__.py b/bootstrap/__main__.py index 39df550..0aed191 100644 --- a/bootstrap/__main__.py +++ b/bootstrap/__main__.py @@ -33,6 +33,14 @@ def main(argv: list[str] | None = None) -> int: default=Path("/home/max/.env/wg-pki/issued"), help="Where minted tarballs are cached.", ) + p.add_argument( + "--secrets-root", + type=Path, + default=Path("/etc/cis490/secrets"), + help="Directory holding shared secrets distributed to lab hosts. " + "Currently used for malwarebazaar.token; provisioned by " + "scripts/set-malwarebazaar-key.sh.", + ) p.add_argument("--log-level", default="info") args = p.parse_args(argv) @@ -49,6 +57,7 @@ def main(argv: list[str] | None = None) -> int: app = make_app( issuer_script=args.issuer_script, issued_root=args.issued_root, + secrets_root=args.secrets_root, ) log.info("listening on %s:%d", args.listen_host, args.listen_port) uvicorn.run( diff --git a/bootstrap/app.py b/bootstrap/app.py index 35a360e..c927e0a 100644 --- a/bootstrap/app.py +++ b/bootstrap/app.py @@ -61,6 +61,7 @@ def make_app( *, issuer_script: Path, issued_root: Path, + secrets_root: Path = Path("/etc/cis490/secrets"), rate_limit_window_s: float = 5.0, ) -> Starlette: """Build the Starlette app. Wired by the production launcher in @@ -139,8 +140,45 @@ def make_app( }, ) + async def get_secret(request: Request) -> Response: + """Serve a named secret from `secrets_root`. Currently only + `malwarebazaar` is allowed — the MB API key Tier-4 needs to + fetch real malware samples. Same trust boundary as the cert + endpoint: anything reaching bootstrap.wg has cleared + iptmonads' WG-membership check.""" + name: str = request.path_params["name"] + # Strict allow-list to keep this from turning into a generic + # secrets API. + if name != "malwarebazaar": + return JSONResponse({"error": "unknown secret"}, status_code=404) + path = secrets_root / "malwarebazaar.token" + if not path.exists(): + return JSONResponse( + {"error": "secret not provisioned", + "hint": "run scripts/set-malwarebazaar-key.sh on the receiver"}, + status_code=404, + ) + try: + data = path.read_text().strip() + except OSError as e: + return JSONResponse({"error": f"read failed: {e}"}, status_code=500) + if not data: + return JSONResponse({"error": "empty secret"}, status_code=500) + src = ( + request.headers.get("x-real-ip") + or (request.headers.get("x-forwarded-for") or "").split(",")[0].strip() + or (request.client.host if request.client else "?") + ) + log.info("served secret=%s to src=%s", name, src) + return Response( + content=data, + media_type="text/plain", + headers={"Cache-Control": "no-store"}, + ) + routes = [ Route("/v1/health", health, methods=["GET"]), Route("/v1/cert/{host_id}", get_cert, methods=["GET"]), + Route("/v1/secret/{name}", get_secret, methods=["GET"]), ] return Starlette(routes=routes) diff --git a/etc/cis490-bootstrap.service b/etc/cis490-bootstrap.service index cf0bd71..905a31b 100644 --- a/etc/cis490-bootstrap.service +++ b/etc/cis490-bootstrap.service @@ -16,7 +16,8 @@ ExecStart=/opt/cis490/.venv/bin/python -m bootstrap \ --listen-host 127.0.0.1 \ --listen-port 8446 \ --issuer-script /opt/wg-pki/scripts/issue-cis490-client-cert-wrapper.sh \ - --issued-root /var/lib/wg-pki/issued + --issued-root /var/lib/wg-pki/issued \ + --secrets-root /etc/cis490/secrets Restart=on-failure RestartSec=5 diff --git a/scripts/install-tier-3-4.sh b/scripts/install-tier-3-4.sh index cf8434c..a0c4bc4 100755 --- a/scripts/install-tier-3-4.sh +++ b/scripts/install-tier-3-4.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash # Tier-3 + Tier-4 deploy orchestrator. Idempotent. Zero operator -# interaction in the default path. +# interaction on the lab host (operator provisions the +# MalwareBazaar API key ONCE on the Pi via +# scripts/set-malwarebazaar-key.sh; from there it's auto-distributed). # # Steps (each idempotent on its own): # 1. install-msfrpcd.sh — auto-install metasploit-framework via @@ -12,16 +14,22 @@ # 4. Tier-3 verify — fire vsftpd_234_backdoor against the # freshly-fetched VM, confirm session # lands and an episode is recorded -# 5. Tier-4 auto-fetch — if MALWAREBAZAAR_API_KEY is set, run -# tools/auto_fetch_samples.py to pull -# one real binary per sample family and -# update samples/manifest.toml +# 5. Tier-4 deploy — fetch MalwareBazaar API key (env > +# local file > bootstrap.wg), then run +# auto_fetch_samples.py to pull one real +# binary per sample family. THIS IS NOT +# OPTIONAL — real-binary episodes are +# the actual training target. Deploy +# fails if zero samples land. # # Inputs (env, all optional): # SKIP_VERIFY — set to skip the live Tier-3 fire test # SKIP_BRIDGE — set to skip bridge setup (limits to non-callback modules) -# SKIP_TIER4 — set to skip the Tier-4 auto-fetch even if API key present -# MALWAREBAZAAR_API_KEY — opt-in: present means run Tier-4 fetch +# SKIP_TIER4 — set to skip Tier-4 deploy entirely (DEPRECATED; +# leaves you with mimic-only data, defeats the project) +# MALWAREBAZAAR_API_KEY — preferred input path; otherwise pulled +# from /opt/cis490/samples/.bazaar.token, then +# from https://bootstrap.wg/v1/secret/malwarebazaar # # Run as root from anywhere on the lab host. Sub-scripts handle their # own root checks. @@ -115,17 +123,67 @@ else log "[4/5] SKIP_VERIFY set" fi -# --- 5. Tier-4 auto-fetch ---------------------------------------------- -if [[ -z "${SKIP_TIER4:-}" && -n "${MALWAREBAZAAR_API_KEY:-}" ]]; then - log "[5/5] Tier-4 auto-fetch (MALWAREBAZAAR_API_KEY set)" +# --- 5. Tier-4 deploy (MANDATORY) -------------------------------------- +if [[ -n "${SKIP_TIER4:-}" ]]; then + log "[5/5] SKIP_TIER4 set — leaving this host on Tier 2/3 mimic-only." + log " This is NOT the recommended configuration; the project's" + log " training target is real-binary episodes." +else + log "[5/5] Tier-4 deploy (real malware fetch — mandatory)" + + # Resolve the MalwareBazaar API key, in priority order: + # 1. MALWAREBAZAAR_API_KEY env (preferred for one-shot ops) + # 2. /opt/cis490/samples/.bazaar.token (already on disk) + # 3. https://bootstrap.wg/v1/secret/malwarebazaar (auto-distributed + # from the Pi after the operator runs set-malwarebazaar-key.sh) + MB_KEY="${MALWAREBAZAAR_API_KEY:-}" + TOKEN_FILE="$INSTALL_ROOT/samples/.bazaar.token" + + if [[ -z "$MB_KEY" && -f "$TOKEN_FILE" ]]; then + MB_KEY="$(cat "$TOKEN_FILE" | tr -d '[:space:]')" + log "using MB key from $TOKEN_FILE" + fi + + if [[ -z "$MB_KEY" ]]; then + log "no local MB key — fetching from https://bootstrap.wg/v1/secret/malwarebazaar" + # Use the same Caddy root the cert auto-fetch trusts. + CADDY_ROOT="$INSTALL_ROOT/etc/caddy-root.crt" + [[ -f "$CADDY_ROOT" ]] || CADDY_ROOT="$REPO_ROOT/etc/caddy-root.crt" + if MB_KEY="$(curl -fsS \ + --cacert "$CADDY_ROOT" \ + --connect-timeout 10 --max-time 30 \ + https://bootstrap.wg/v1/secret/malwarebazaar 2>/dev/null)"; then + MB_KEY="$(echo -n "$MB_KEY" | tr -d '[:space:]')" + install -d -o cis490 -g cis490 -m 0750 "$INSTALL_ROOT/samples" + install -m 0600 -o cis490 -g cis490 /dev/stdin "$TOKEN_FILE" <<<"$MB_KEY" + log "fetched MB key from bootstrap.wg + cached at $TOKEN_FILE" + else + die "could not fetch MB key from bootstrap.wg. Either: + - run on the Pi: sudo MALWAREBAZAAR_API_KEY= /opt/cis490/scripts/set-malwarebazaar-key.sh + (one-time per fleet; lab hosts auto-fetch after that), OR + - run on this host: MALWAREBAZAAR_API_KEY= sudo $0 + Get a free key at https://bazaar.abuse.ch/" + fi + fi + [[ -n "$MB_KEY" ]] || die "MB key still empty after all resolution paths" + + log "running auto_fetch_samples.py — fetches one real binary per family" PY="$INSTALL_ROOT/.venv/bin/python" [[ -x "$PY" ]] || PY="$(command -v python3)" - sudo -E -u cis490 "$PY" "$INSTALL_ROOT/tools/auto_fetch_samples.py" || \ - log "Tier-4 auto-fetch failed (non-fatal) — Tier 3 still active" -elif [[ -z "${MALWAREBAZAAR_API_KEY:-}" ]]; then - log "[5/5] Tier-4 skipped — set MALWAREBAZAAR_API_KEY to enable real-binary fetch" -else - log "[5/5] SKIP_TIER4 set" + if ! sudo -E MALWAREBAZAAR_API_KEY="$MB_KEY" -u cis490 "$PY" \ + "$INSTALL_ROOT/tools/auto_fetch_samples.py" \ + > /tmp/cis490-tier4-deploy.log 2>&1; then + log "Tier-4 fetch failed — last 30 lines of /tmp/cis490-tier4-deploy.log:" + tail -30 /tmp/cis490-tier4-deploy.log >&2 || true + die "Tier-4 deploy failed; without real binaries this host produces only mimics" + fi + REAL_COUNT="$(ls "$INSTALL_ROOT/samples/store/" 2>/dev/null | wc -l)" + if [[ "$REAL_COUNT" -lt 1 ]]; then + log "auto_fetch_samples.py exited 0 but samples/store/ is empty — see /tmp/cis490-tier4-deploy.log" + tail -30 /tmp/cis490-tier4-deploy.log >&2 || true + die "Tier-4 deploy failed: no real binaries staged" + fi + log "Tier-4 ✓ ($REAL_COUNT real binaries staged in $INSTALL_ROOT/samples/store/)" fi log "" diff --git a/scripts/set-malwarebazaar-key.sh b/scripts/set-malwarebazaar-key.sh new file mode 100755 index 0000000..b27ad1a --- /dev/null +++ b/scripts/set-malwarebazaar-key.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# One-time operator step on the receiver Pi. +# +# Provisions the MalwareBazaar API key at /etc/cis490/secrets/malwarebazaar.token +# with mode 0640, owned by root:cis490 (the bootstrap service runs as root and +# reads this file directly; the cis490 user is included in the group so future +# rotations can be done without root). +# +# Once provisioned, every lab host that runs install-tier-3-4.sh fetches the +# key from https://bootstrap.wg/v1/secret/malwarebazaar (over WG, gated by +# iptmonads at L4) — operator does NOT need to repeat this on each lab host. +# +# Usage: +# sudo MALWAREBAZAAR_API_KEY= /opt/cis490/scripts/set-malwarebazaar-key.sh +# or: +# echo $key | sudo /opt/cis490/scripts/set-malwarebazaar-key.sh + +set -euo pipefail + +SECRETS_DIR="${SECRETS_DIR:-/etc/cis490/secrets}" +KEY_FILE="$SECRETS_DIR/malwarebazaar.token" + +log() { printf '[set-malwarebazaar-key] %s\n' "$*" >&2; } +die() { log "FATAL: $*"; exit 1; } + +[[ $EUID -eq 0 ]] || die "must run as root" + +# Accept the key via env var first, stdin second. +KEY="${MALWAREBAZAAR_API_KEY:-}" +if [[ -z "$KEY" ]] && [[ ! -t 0 ]]; then + KEY="$(cat)" +fi +KEY="$(echo -n "$KEY" | tr -d '[:space:]')" +[[ -n "$KEY" ]] || die "no key provided. Set MALWAREBAZAAR_API_KEY or pipe via stdin." + +# Free signup at https://bazaar.abuse.ch/ — the key is a 64-char +# alphanumeric string. Loose sanity check. +[[ ${#KEY} -ge 32 ]] || die "key looks too short (${#KEY} chars). Get a real one from https://bazaar.abuse.ch/" + +if ! id -u cis490 >/dev/null 2>&1; then + die "cis490 user not present — run install-receiver.sh first" +fi + +install -d -o root -g cis490 -m 0750 "$SECRETS_DIR" +install -m 0640 -o root -g cis490 /dev/stdin "$KEY_FILE" <<<"$KEY" + +log "key installed at $KEY_FILE (${#KEY} chars)" +log "" +log "Next step: each lab host's install-tier-3-4.sh will now fetch it" +log "automatically from https://bootstrap.wg/v1/secret/malwarebazaar" +log "during deploy. To force a re-fetch on an already-deployed host:" +log " ssh sudo rm /opt/cis490/samples/.bazaar.token" +log " ssh sudo /opt/cis490/scripts/install-tier-3-4.sh" +log "" +log "If the bootstrap service was running already, no restart needed —" +log "the secret endpoint reads the file fresh on each request." diff --git a/tests/test_bootstrap_secrets.py b/tests/test_bootstrap_secrets.py new file mode 100644 index 0000000..586acef --- /dev/null +++ b/tests/test_bootstrap_secrets.py @@ -0,0 +1,80 @@ +"""Tests for the bootstrap.wg /v1/secret/ endpoint. + +Tier 4 needs the MalwareBazaar API key on each lab host. We +distribute the key from the Pi via this endpoint instead of forcing +the operator to copy it manually to every host. Trust boundary is +identical to /v1/cert/: a caller that reaches bootstrap.wg +is already a WG-mesh peer (iptmonads gate). +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from starlette.testclient import TestClient + +from bootstrap.app import make_app + + +@pytest.fixture +def bootstrap_app(tmp_path: Path): + issued_root = tmp_path / "issued" + issued_root.mkdir() + secrets_root = tmp_path / "secrets" + secrets_root.mkdir() + # Issuer script doesn't matter for these tests — make a no-op stub + # so make_app doesn't barf on a missing path. + stub = tmp_path / "stub.sh" + stub.write_text("#!/bin/sh\nexit 0\n") + stub.chmod(0o755) + app = make_app( + issuer_script=stub, + issued_root=issued_root, + secrets_root=secrets_root, + ) + return app, secrets_root + + +def test_secret_404_when_not_provisioned(bootstrap_app): + app, _ = bootstrap_app + with TestClient(app) as client: + r = client.get("/v1/secret/malwarebazaar") + assert r.status_code == 404 + assert "secret not provisioned" in r.json()["error"] + + +def test_secret_returns_provisioned_token(bootstrap_app): + app, secrets_root = bootstrap_app + token = "a" * 64 + (secrets_root / "malwarebazaar.token").write_text(token + "\n") + with TestClient(app) as client: + r = client.get("/v1/secret/malwarebazaar") + assert r.status_code == 200 + # Response is the bare token, no JSON wrapping (lab-host curls + # this and pipes straight into the install flow). + assert r.text.strip() == token + # Don't cache the secret in any intermediate proxy. + assert r.headers.get("cache-control") == "no-store" + + +def test_unknown_secret_name_404(bootstrap_app): + app, secrets_root = bootstrap_app + # Even if a file with that name existed on disk, the route's + # allow-list rejects anything but `malwarebazaar`. + (secrets_root / "anything-else.token").write_text("x") + with TestClient(app) as client: + r = client.get("/v1/secret/anything-else") + assert r.status_code == 404 + assert "unknown secret" in r.json()["error"] + + +def test_empty_secret_500(bootstrap_app): + """An empty token file is operator error — fail loudly so the + lab-host install doesn't end up calling MB with no key.""" + app, secrets_root = bootstrap_app + (secrets_root / "malwarebazaar.token").write_text("") + with TestClient(app) as client: + r = client.get("/v1/secret/malwarebazaar") + assert r.status_code == 500 + assert "empty" in r.json()["error"] diff --git a/tools/auto_fetch_samples.py b/tools/auto_fetch_samples.py index 9ab0b9e..4805879 100644 --- a/tools/auto_fetch_samples.py +++ b/tools/auto_fetch_samples.py @@ -192,7 +192,13 @@ def main(argv: list[str] | None = None) -> int: failed += 1 log.info("done: fetched=%d skipped=%d failed=%d", fetched, skipped, failed) - return 0 if (failed == 0 or fetched > 0) else 1 + # Tier 4 is mandatory — exit non-zero unless at least one real + # binary landed (or all entries were already real, i.e. nothing + # to do). The deploy script depends on this exit semantic. + if fetched == 0 and skipped == 0: + log.error("zero samples fetched and zero already-real — Tier 4 not viable") + return 1 + return 0 if __name__ == "__main__":