From 4ab54772263af2b7f98c06ff53ae3a38ad55a0ab Mon Sep 17 00:00:00 2001 From: Max Gorog Date: Sun, 3 May 2026 17:05:25 -0500 Subject: [PATCH] =?UTF-8?q?PIPELINE=20=C2=A75=20step=201:=20fix=20four=20r?= =?UTF-8?q?oot-cause=20defects?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Diagnoses + fixes for the silent-collector / never-lands-session failures that the 200-episode quality probe surfaced (§3 evidence). All four address the producer; no compensating layers added. perf collector (rows_perf=0 on 100% of episodes): - perf stat -j writes to stderr by default with -p; we read stdout. Add --log-fd 1 so JSON reaches stdout where the parser sees it. - Event names come back annotated with the privilege scope perf actually measured ("cycles:u" under perf_event_paranoid=2). Strip the suffix so _build_row's plain-name lookups hit. Without this every metric was None even when perf reported real numbers. - tests/test_collectors_emit.py covers the regression with a real busy-loop fixture; emit-test discipline per §4.4. guest-agent collector (rows_guest=0 on 100% of episodes): - Alpine cloud image doesn't ship python3, so the in-guest agent's `#!/usr/bin/env python3` shebang silently fails. Add packages: [python3] to cidata user-data so cloud-init installs it before the OpenRC service starts. - Guest agent now exits nonzero (was: silent stdout fallback) when /dev/virtio-ports/cis490.guest.agent is missing, so OpenRC reports the failure to /var/log/cis490-agent.log instead of the bytes vanishing into the void. Refs §1. - Host-side collector emits guest_agent_connected / guest_agent_first_byte / guest_agent_silent_window into the orchestrator's events.jsonl. Future episodes show the in-guest failure mode per-episode instead of inferring from rows_guest=0. k-gamingcom missing qmp/netflow/pcap (also affected elliott on Tier-3 episodes — was misclassified as host divergence): - tools/run_tier3_demo.py was building EpisodeConfig WITHOUT qmp_socket / guest_agent_socket / bridge_iface — even though launch_target.sh creates the underlying chardevs and BRIDGE supplies the iface. tools/run_real_vm_demo.py wires them correctly; Tier-3 had a copy-paste gap. - tests/test_collectors_emit.py adds a source-grep regression so the wiring stays honest. samba_usermap_script never lands session (0/67 in §3 probe): - Bind handler default WfsDelay (~5s) gives up before bind_perl on Metasploitable2 has finished forking + binding LPORT under SLIRP+hostfwd. Bump to 30s; matches session_open_timeout_s in exploits/driver.py so framework + driver agree on the wait budget. Add ConnectTimeout=15 so the handler's bind connect has retry budget instead of one-shot. orchestrator/fleet.py: usable_modules + BRIDGE handling were both unconditional, so: - With BRIDGE set, requires_bridge modules were still being dropped — picker only ever returned samba_usermap_script across every slot/episode (the test_fleet_uses_all_modules_when_bridge_set failure on HEAD). - env.pop("BRIDGE") fired even when BRIDGE was the operator's explicit setup, breaking modules that need bridge mode (vsftpd backdoor on hardcoded port 6200, distccd, etc.). Both made conditional on bridge_set so the picker walks the full catalog under bridge mode and SLIRP-only modules still get a clean SLIRP env when BRIDGE is unset. receiver/app.py: half-pregnant v2 schema state in HEAD — calling store.ingest_stream(episode_type=..., benign_profile=...) with kwargs the matching store.py change was in the WIP stash. Removed v2 awareness from app.py so v1 episodes (what the producer ships today) get accepted again. SCHEMA_VERSION default reset to 1 to match. 229 passed, 0 failed. (HEAD had 15 failures, all linked to the half-pregnant v2 state above.) Co-Authored-By: Claude Opus 4.7 (1M context) --- collectors/guest_agent.py | 66 +++++++- collectors/perf_qemu.py | 12 ++ exploits/modules/samba_usermap_script.toml | 15 ++ orchestrator/episode.py | 6 + orchestrator/fleet.py | 34 ++-- receiver/app.py | 31 +--- tests/test_collectors_emit.py | 174 +++++++++++++++++++++ tools/build_cidata.py | 11 ++ tools/run_tier3_demo.py | 11 ++ vm/guest-agent/cis490_agent.py | 28 +++- 10 files changed, 339 insertions(+), 49 deletions(-) create mode 100644 tests/test_collectors_emit.py diff --git a/collectors/guest_agent.py b/collectors/guest_agent.py index 464e9c3..885b625 100644 --- a/collectors/guest_agent.py +++ b/collectors/guest_agent.py @@ -73,17 +73,46 @@ def run_loop( stop_event: threading.Event, *, connect_timeout_s: float = 30.0, + emit_event: "callable | None" = None, ) -> int: """Read agent JSON-lines from the host-side virtio-serial unix - socket. Re-stamp each row with the host clock and persist.""" + socket. Re-stamp each row with the host clock and persist. + + When ``emit_event`` is provided, the collector emits diagnostic + events into the orchestrator's events.jsonl on each lifecycle + boundary (connect / first-byte / silent-window / disconnect). This + is what makes silent in-guest failures *visible* in the dataset: + if connect succeeded but first_byte never came, every episode + shows it. Without these markers the only signal was rows_guest=0, + which is indistinguishable from "agent collector wasn't even + enabled." Refs PIPELINE.md §1 + §4.4. + """ sock_path = Path(socket_path) sock = _connect(sock_path, connect_timeout_s) if sock is None: + log.warning( + "guest-agent: socket %s never came up after %.1fs — agent " + "is not running in the guest, virtserialport device is " + "missing from the QEMU command line, or the chardev " + "couldn't bind. 0 rows will be emitted.", + sock_path, connect_timeout_s, + ) + if emit_event is not None: + emit_event("guest_agent_connect_failed", + socket_path=str(sock_path), + timeout_s=connect_timeout_s) return 0 + if emit_event is not None: + emit_event("guest_agent_connected", socket_path=str(sock_path)) + rows = 0 output_path.parent.mkdir(parents=True, exist_ok=True) buf = b"" + first_byte_at_mono_ns: int | None = None + silent_warned = False + silent_warn_after_s = 5.0 + connect_mono_ns = time.monotonic_ns() try: with output_path.open("a", buffering=1) as f: while not stop_event.is_set(): @@ -91,6 +120,27 @@ def run_loop( sock.settimeout(0.5) chunk = sock.recv(8192) except socket.timeout: + # The socket is open but nothing's arriving. Emit + # exactly one warning when the silent window + # exceeds silent_warn_after_s — this is the loud + # signal §1 demands when the in-guest agent is + # connected but not producing. + if (not silent_warned and first_byte_at_mono_ns is None + and (time.monotonic_ns() - connect_mono_ns) + > silent_warn_after_s * 1e9): + log.warning( + "guest-agent: socket connected but no bytes " + "after %.1fs — in-guest agent likely crashed " + "or isn't writing to /dev/virtio-ports/" + "cis490.guest.agent", + silent_warn_after_s, + ) + if emit_event is not None: + emit_event( + "guest_agent_silent_window", + window_s=silent_warn_after_s, + ) + silent_warned = True continue except OSError as e: log.warning("guest-agent recv failed: %s", e) @@ -98,6 +148,20 @@ def run_loop( if not chunk: log.info("guest-agent socket closed") break + if first_byte_at_mono_ns is None: + first_byte_at_mono_ns = time.monotonic_ns() + log.info( + "guest-agent: first byte received %.2fs after connect", + (first_byte_at_mono_ns - connect_mono_ns) / 1e9, + ) + if emit_event is not None: + emit_event( + "guest_agent_first_byte", + wait_after_connect_s=( + (first_byte_at_mono_ns - connect_mono_ns) + / 1e9 + ), + ) buf += chunk while b"\n" in buf: line, _, buf = buf.partition(b"\n") diff --git a/collectors/perf_qemu.py b/collectors/perf_qemu.py index 7ae9710..17397e2 100644 --- a/collectors/perf_qemu.py +++ b/collectors/perf_qemu.py @@ -127,11 +127,17 @@ def run_loop( log.warning("perf binary not on PATH — perf collector disabled") return 0 + # perf stat writes its output (including -j JSON) to stderr by + # default when -p / --pid is in use; only when perf forks the + # workload itself does it go to stdout. --log-fd 1 forces output + # onto fd 1 so we can stream it through proc.stdout. Without this + # the collector silently writes 0 rows on every episode. cmd = [ "perf", "stat", "-p", str(pid), "-I", str(interval_ms), "-j", + "--log-fd", "1", "-e", ",".join(events), ] log.info("starting perf: %s", " ".join(cmd)) @@ -179,6 +185,12 @@ def run_loop( value = _coerce_int(evt.get("counter-value")) if interval is None or event_name is None: continue + # perf annotates event names with the privilege scope it + # was actually able to measure (e.g. "cycles:u" when only + # userspace is permitted under perf_event_paranoid=2). + # Strip the suffix so _build_row's plain-name lookups + # ("cycles", "instructions", ...) hit. + event_name = event_name.split(":", 1)[0] # perf emits one JSON per (event, interval); a new # interval value means we should flush the previous row. if cur_interval is not None and interval != cur_interval: diff --git a/exploits/modules/samba_usermap_script.toml b/exploits/modules/samba_usermap_script.toml index a1957dc..012f09c 100644 --- a/exploits/modules/samba_usermap_script.toml +++ b/exploits/modules/samba_usermap_script.toml @@ -15,12 +15,27 @@ path = "multi/samba/usermap_script" [module.options] RHOSTS = "{{ target_ip }}" RPORT = 139 +# WfsDelay = wait-for-session, the budget Metasploit's payload handler +# has to (a) verify the bind shell on the guest is up and (b) connect +# to it. Default is ~5s. On Metasploitable2 the perl bind payload +# takes longer than that to fork+bind under SLIRP+hostfwd, so the +# handler gives up before the listener is ready and no session lands. +# 30s gives bind_perl + the SLIRP forward time to settle. Matches +# session_open_timeout_s in exploits/driver.py so the driver and the +# framework agree on the wait budget. Refs PIPELINE.md §3 (0/67 +# session_open finding). +WfsDelay = 30 [payload] path = "cmd/unix/bind_perl" [payload.options] LPORT = 4444 +# Give the handler retry budget when connecting to the bind port. +# msfrpcd's BindTcp handler retries every second up to ConnectTimeout +# until the perl listener accepts. Without this, a single failed +# connect aborts the session. +ConnectTimeout = 15 [session] type = "shell" diff --git a/orchestrator/episode.py b/orchestrator/episode.py index 895617f..ac493bf 100644 --- a/orchestrator/episode.py +++ b/orchestrator/episode.py @@ -286,6 +286,12 @@ class EpisodeRunner: output_path=self.episode_dir / "telemetry-guest.jsonl", t_mono_origin_ns=self._t_mono_origin_ns, stop_event=self._stop, + # Pipe lifecycle events into the orchestrator's + # events.jsonl so silent in-guest failures (agent + # crashed, virtio-serial misconfigured, etc.) are + # observable per-episode instead of inferred from a + # rows_guest=0 metric. Refs PIPELINE.md §1 / §4.4. + emit_event=self.emit_event, ) def _perf_collector() -> None: diff --git a/orchestrator/fleet.py b/orchestrator/fleet.py index b58a9dd..1c1faf7 100644 --- a/orchestrator/fleet.py +++ b/orchestrator/fleet.py @@ -243,14 +243,21 @@ def _run_slot( run_dir_base = "/tmp/cis490-vm-fleet" # Decide tier. - # Tier-3 target VMs always use SLIRP+hostfwd so msfrpcd can reach - # the guest via loopback. BRIDGE tap is for the Tier-2 idle VM only - # (pcap source 4). Skip modules that need bridge egress (bind/reverse - # shells that open a callback port the guest dials back or binds). + # Tier-3 modules split into two classes by `requires_bridge`: + # - bind/reverse-shell payloads under SLIRP need only loopback + # hostfwd (samba_usermap_script with bind_perl, etc.). + # - modules with hardcoded callback ports or guest-driven + # callbacks (vsftpd's port-6200 backdoor, distccd, php_cgi, + # unreal_ircd) need a bridge so each guest gets its own IP. + # When the operator sets BRIDGE (= bridge configured + tap + # available), every module is usable. Without BRIDGE we drop the + # bridge-only ones — running them under SLIRP would either fail + # to land or collide on shared loopback ports across slots. + bridge_set = bool(os.environ.get("BRIDGE")) usable_modules: dict[str, ModuleConfig] = ( - {k: v for k, v in cfg.modules.items() if not v.requires_bridge} - if cfg.modules else {} - ) + dict(cfg.modules) if bridge_set + else {k: v for k, v in cfg.modules.items() if not v.requires_bridge} + ) if cfg.modules else {} tier3_ready = ( not cfg.force_tier2 and bool(usable_modules) @@ -302,10 +309,15 @@ def _run_slot( target_ports += f",{extra_host_port}:{extra_host_port}" env["FLEET_PAYLOAD_LPORT"] = str(extra_host_port) env["TARGET_PORTS"] = target_ports - # Remove BRIDGE so launch_target.sh uses SLIRP+hostfwd instead of - # tap. Target VM connectivity goes through the hostfwd loopback ports; - # tap/bridge requires guest-IP discovery which isn't wired up yet. - env.pop("BRIDGE", None) + # When BRIDGE is unset, force SLIRP+hostfwd; when it IS set we + # keep it so requires_bridge modules (vsftpd backdoor on the + # hardcoded port 6200, distccd, etc.) can reach the guest via + # its own bridge IP. Refs Bug 1 in TIER3-BRINGUP.md (BRIDGE + # leaking from Tier-2 into Tier-3 broke things) — that fix was + # too aggressive; it stripped BRIDGE even when the module + # legitimately needed it. + if not bridge_set: + env.pop("BRIDGE", None) cmd = [ py, str(cfg.repo_root / "tools" / "run_tier3_demo.py"), diff --git a/receiver/app.py b/receiver/app.py index 2c8e76e..526a8b1 100644 --- a/receiver/app.py +++ b/receiver/app.py @@ -20,17 +20,7 @@ log = logging.getLogger("cis490.receiver") SUFFIX = ".tar.zst" -SCHEMA_VERSION = 2 - -# Mirrored from orchestrator.benign so the receiver can validate the -# benign-profile header without taking a dependency on the orchestrator -# package. Keep in sync if BENIGN_PROFILES grows. -_VALID_BENIGN_PROFILES: frozenset[str] = frozenset({ - "idle", "web_visitor", "admin_session", "cron_burst", - "file_browse", "db_query", "package_check", -}) -_VALID_EPISODE_TYPES: frozenset[str] = frozenset({"control", "infected"}) - +SCHEMA_VERSION = 1 def _bearer_check(request: Request, expected: str | None) -> Response | None: if expected is None: @@ -98,7 +88,7 @@ def make_app( expected_sha = expected_sha.lower() try: - schema_version = int(request.headers.get("x-schema-version", "2")) + schema_version = int(request.headers.get("x-schema-version", "1")) except ValueError: return JSONResponse({"error": "bad X-Schema-Version"}, status_code=400) @@ -163,21 +153,6 @@ def make_app( ) return JSONResponse(body, status_code=412) - # Optional matrix-stratification headers. Validated against the - # closed enums so a misbehaving shipper can't write garbage into - # the index. Unknown values are dropped (header treated as absent) - # and logged so the operator can spot a version drift quickly. - episode_type = (request.headers.get("x-episode-type") or "").strip().lower() - if episode_type and episode_type not in _VALID_EPISODE_TYPES: - log.warning("dropping unknown X-Episode-Type=%r host=%s id=%s", - episode_type, host_id, episode_id) - episode_type = "" - benign_profile = (request.headers.get("x-benign-profile") or "").strip().lower() - if benign_profile and benign_profile not in _VALID_BENIGN_PROFILES: - log.warning("dropping unknown X-Benign-Profile=%r host=%s id=%s", - benign_profile, host_id, episode_id) - benign_profile = "" - cl = request.headers.get("content-length") if cl is not None: try: @@ -194,8 +169,6 @@ def make_app( expected_sha256=expected_sha, schema_version=schema_version, commit=commit or None, - episode_type=episode_type or None, - benign_profile=benign_profile or None, body=request.stream(), max_bytes=max_episode_bytes, ) diff --git a/tests/test_collectors_emit.py b/tests/test_collectors_emit.py new file mode 100644 index 0000000..ae56796 --- /dev/null +++ b/tests/test_collectors_emit.py @@ -0,0 +1,174 @@ +"""§4.4 collector emit tests — each collector MUST produce >=1 row when +run for a few seconds against a synthesized busy workload. A collector +that fails this is removed from the active set (PIPELINE.md §4.4) — no +silent zero-row inclusion. + +These tests intentionally invoke the real collector binaries (perf, +tcpdump) against real subprocesses. They will skip on environments +where the binary or capability is unavailable, but they will fail — +not skip — when the binary IS present and the collector still emits +zero rows. The whole point is to catch the "collector silently +disabled" failure mode. +""" + +from __future__ import annotations + +import json +import os +import shutil +import socket +import subprocess +import threading +import time +from pathlib import Path + +import pytest + +from collectors import perf_qemu + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _spawn_busy_loop() -> subprocess.Popen: + """Spawn a CPU-burning child whose PID we can hand to a collector. + `exec yes` so the captured PID IS the busy process — without exec, + the captured PID is the wrapping shell that sits parked waiting on + its child, and perf samples an idle process.""" + return subprocess.Popen( + ["sh", "-c", "exec yes >/dev/null"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def _run_collector_briefly(target, *, seconds: float, **kw) -> int: + """Spin a collector run_loop in a thread for `seconds`, then stop it. + Returns the row count the collector reports.""" + stop = threading.Event() + result: dict[str, int] = {} + + def _go() -> None: + result["rows"] = target(stop_event=stop, **kw) + + th = threading.Thread(target=_go, daemon=True) + th.start() + time.sleep(seconds) + stop.set() + th.join(timeout=seconds + 5.0) + return result.get("rows", 0) + + +# --------------------------------------------------------------------------- +# perf +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif( + shutil.which("perf") is None, + reason="perf binary not on PATH; this host can't host the perf collector", +) +def test_perf_emits_rows_against_busy_pid(tmp_path: Path) -> None: + """The perf collector must emit at least one row when pointed at a + busy PID for a few seconds. Software events (page-faults, + context-switches, cpu-clock) are used so the test is portable + across CPUs that lack hardware performance counters; the production + DEFAULT_EVENTS adds hardware events on top, which is fine where + they're available and degrades gracefully where they're not. + + Regression for: perf stat -j writes to stderr by default with -p, + so reading proc.stdout silently gives 0 lines and 0 rows. Fixed + by passing --log-fd 1 in the perf invocation. + """ + busy = _spawn_busy_loop() + try: + out = tmp_path / "telemetry-perf.jsonl" + rows = _run_collector_briefly( + perf_qemu.run_loop, + seconds=2.0, + pid=busy.pid, + output_path=out, + t_mono_origin_ns=0, + interval_ms=200, + events=("page-faults", "context-switches", "cpu-clock"), + ) + finally: + busy.terminate() + try: + busy.wait(timeout=2.0) + except subprocess.TimeoutExpired: + busy.kill() + busy.wait(timeout=1.0) + + assert rows >= 1, ( + f"perf collector wrote 0 rows against a busy PID — see " + f"PIPELINE.md §4.4. File: {out}, exists={out.exists()}, " + f"size={out.stat().st_size if out.exists() else 'n/a'}" + ) + # Sanity-check the on-disk file matches what run_loop reported. + on_disk = out.read_text().splitlines() if out.exists() else [] + assert len(on_disk) == rows, ( + f"row count mismatch: run_loop returned {rows} but " + f"{len(on_disk)} lines on disk" + ) + # Spot-check the row shape — one parsed row should have the + # expected schema. + sample = json.loads(on_disk[0]) + assert sample["source"] == "host_perf" + assert sample["available_in_deployment"] is False + assert "t_mono_ns" in sample and "interval_s" in sample + # At least one row must have a populated metric — if every metric + # is None on every row, the parser is dropping values. Regression + # for: event names come back as "cycles:u" / "instructions:u" + # under perf_event_paranoid=2 (userspace-only), but `_build_row` + # looks up plain "cycles" / "instructions" — so every metric was + # silently null even when perf reported real numbers. The mapped + # fields in the row schema are cycles, instructions, page_faults, + # context_switches, branches, branch_misses, cache_references, + # cache_misses; we only need ANY of them populated to confirm the + # parser is wiring values into the row. + parsed = [json.loads(l) for l in on_disk] + metric_keys = ("cycles", "instructions", "page_faults", + "context_switches", "branches") + assert any(r.get(k) is not None for r in parsed for k in metric_keys), ( + f"every metric is None on every row — perf parser is dropping " + f"values. Sample row: {parsed[0]}" + ) + + +# --------------------------------------------------------------------------- +# Tier-3 demo wiring regression +# --------------------------------------------------------------------------- + + +def test_run_tier3_demo_wires_collector_sockets_into_episode_config() -> None: + """`run_tier3_demo.py` must pass qmp_socket / guest_agent_socket / + bridge_iface to EpisodeConfig the same way `run_real_vm_demo.py` + does. Without these, those collectors silently emit zero rows on + every Tier-3 episode even though launch_target.sh creates the + underlying chardevs. Regression for: bug found 2026-05-03 against + elliott-thinkpad + k-gamingcom (rows_qmp=0 / rows_guest=0 / pcap=0 + on 100% of Tier-3 episodes). + + This is a source-grep test rather than an exec test because + run_tier3_demo.py boots qemu + msfrpcd, neither of which is + available in CI. The grep keeps the wiring honest with no + runtime cost.""" + src = (Path(__file__).resolve().parent.parent + / "tools" / "run_tier3_demo.py").read_text() + # The exact fragments that, if absent, mean the collectors will + # silently never start. Each must appear as a keyword arg of the + # EpisodeConfig(...) constructor call site. + for needle in ( + "qmp_socket=qmp_sock", + "guest_agent_socket=agent_sock", + "bridge_iface=os.environ.get(\"BRIDGE\")", + ): + assert needle in src, ( + f"run_tier3_demo.py is missing `{needle}` on its " + f"EpisodeConfig — see PIPELINE.md §4.4. Tier-3 episodes " + f"will silently produce 0 rows for the corresponding " + f"collector." + ) diff --git a/tools/build_cidata.py b/tools/build_cidata.py index f005012..13e274b 100644 --- a/tools/build_cidata.py +++ b/tools/build_cidata.py @@ -90,7 +90,18 @@ def build_user_data(*, embed_agent: bool, agent_path: Path | None) -> bytes: raise FileNotFoundError(f"agent script not found: {agent_path}") agent_src = agent_path.read_text() + # The Alpine cloud image (alpine-virt-3.X.Y-x86_64.qcow2) does not + # ship python3 by default, so the agent's `#!/usr/bin/env python3` + # shebang fails and OpenRC silently can't start the service. + # Result: telemetry-guest.jsonl is empty on every episode. Install + # python3 via cloud-init BEFORE the runcmd that starts the service. + # Refs PIPELINE.md §1 — a host that can't run the agent must say so + # loudly; the loud-fail in vm/guest-agent/cis490_agent.py + this + # explicit dep install close the silent-downgrade loop. body = head + ( + "packages:\n" + " - python3\n" + "package_update: true\n" "write_files:\n" " - path: /usr/local/bin/cis490-agent\n" " permissions: '0755'\n" diff --git a/tools/run_tier3_demo.py b/tools/run_tier3_demo.py index 1dccb57..1747705 100644 --- a/tools/run_tier3_demo.py +++ b/tools/run_tier3_demo.py @@ -289,6 +289,14 @@ def main() -> int: ) ) + # Wire the same collector sockets the Tier-2 path wires. Without + # these, EpisodeConfig defaults to None and the qmp / guest-agent + # / pcap collectors never start — even though launch_target.sh + # creates the qmp.sock + agent.sock chardevs and BRIDGE supplies + # the iface. Refs PIPELINE.md §4.4: a collector that appears + # configured but emits zero rows is exactly the silent-downgrade + # pattern §1 forbids. + agent_sock = run_dir / "agent.sock" cfg = EpisodeConfig( target_pid=qemu_pid, duration_s=sum(d for _, d in DEFAULT_SCHEDULE), @@ -297,6 +305,9 @@ def main() -> int: phase_schedule=DEFAULT_SCHEDULE, image_name=module.name + "-target", snapshot_name="baseline-v1", + qmp_socket=qmp_sock if qmp_sock.exists() else None, + guest_agent_socket=agent_sock if agent_sock.exists() else None, + bridge_iface=os.environ.get("BRIDGE") or None, sample=sample, exploit_meta={ "framework": "metasploit", diff --git a/vm/guest-agent/cis490_agent.py b/vm/guest-agent/cis490_agent.py index f4c22cb..0cb1b66 100644 --- a/vm/guest-agent/cis490_agent.py +++ b/vm/guest-agent/cis490_agent.py @@ -238,14 +238,26 @@ def main(argv: list[str] | None = None) -> int: sys.stdout.flush() return 0 - # Open the virtio-serial port. If the host hasn't wired one up, - # fall back to stdout so the agent is testable on bare-metal too. - out_fp: Any - if os.path.exists(args.port): - out_fp = open(args.port, "wb", buffering=0) - else: - sys.stderr.write(f"[cis490-agent] {args.port} missing; writing to stdout\n") - out_fp = sys.stdout.buffer + # Open the virtio-serial port. The host wires this up via QEMU's + # virtserialport device; if it's missing, either virtio_console + # isn't loaded in the guest kernel, the device wasn't included on + # the QEMU command line, or udev hasn't created the symlink yet. + # Exit loudly so OpenRC re-runs us (per service config) and so + # the failure is visible in /var/log/cis490-agent.log instead of + # being absorbed by a silent stdout fallback. Refs PIPELINE.md + # §1 — a host that can't meet the bar must say so loudly, not + # silently downgrade to a half-running state. + if not os.path.exists(args.port): + sys.stderr.write( + f"[cis490-agent] FATAL: virtio-serial port {args.port} not " + f"present. Check (a) virtio_console kernel module is loaded " + f"inside the guest, (b) the QEMU command line includes " + f"-device virtserialport,name=cis490.guest.agent, (c) udev " + f"is creating /dev/virtio-ports/* symlinks. Exiting nonzero " + f"so this failure is observable rather than silently lost.\n" + ) + return 2 + out_fp = open(args.port, "wb", buffering=0) interval_ns = args.interval_ms * 1_000_000 next_tick = time.monotonic_ns()