Close out the deployment-readiness gaps
Wraps the gaps surfaced in the "what is not implemented" audit so the
fleet really is shippable end-to-end. Verified live on the Pi:
- cis490-shipper --ping → HTTP 200 through Caddy + mTLS via the
new wg-pki client CA leaf
- real episode dir → tar+zstd → PUT → HTTP 201 stored
- re-ship same bytes → 200 (idempotent)
- re-ship different bytes under same id → 409 (conflict)
Changes:
orchestrator/episode.py
- EpisodeConfig.revert_at_start / revert_at_end (Tier 0+ snapshot/
revert per docs/architecture.md). When set + qmp_socket present,
EpisodeRunner issues loadvm <snapshot_name> and emits
snapshot_revert / snapshot_revert_failed events on the same
monotonic clock as everything else.
collectors/qmp.py
- savevm() / loadvm() helpers using human-monitor-command, plus a
test against the fake QMP server.
exploits/workloads.py
- chunked_real_binary_upload() returns a ChunkedUpload plan: 8 KiB
base64 chunks (~6 KiB binary each) so msfrpc never sees a buffer-
busting payload. Includes a finalize step that sha256-verifies on
the guest before exec.
- real_binary_workload() now wraps the chunked plan for backwards
compat with single-shot callers.
exploits/driver.py
- Tier-4 dispatch walks the chunked plan in MSFExploitDriver:
each chunk is a separate session_shell_write; finalize verifies;
exec only runs on sha-ok. New events: real_binary_upload_begin,
real_binary_verify, real_binary_aborted.
etc/cis490-orchestrator.service
- Reads /etc/cis490/lab-host.env (FLEET_HOST_ID + optional BRIDGE).
- Grants AmbientCapabilities CAP_NET_RAW (tcpdump for source 4) +
CAP_SYS_ADMIN + CAP_PERFMON (perf for source 3) so collectors
work under hardening.
scripts/install-lab-host.sh
- Writes /etc/cis490/lab-host.env on first install with FLEET_HOST_ID
defaulting to `hostname -s`.
- Best-effort: fetches the Alpine baseline qcow2 (sha512-pinned) and
builds cidata.iso with the in-guest agent embedded; symlinks both
into /opt/cis490/vm/images/ so launchers find them.
scripts/fetch-alpine-baseline.sh
- Idempotent fetcher for the Alpine 3.21 cloud-init nocloud qcow2
matching the sha512 in docs/sources.md.
tools/plot_envelope.py
- Rebuilt to render whatever telemetry the episode dir contains:
proc → QMP block ops → perf IPC/miss-rate → bridge pkts/SYNs →
guest agent load/mem. Missing sources are silently skipped.
tools/index_reader.py
- cis490-index CLI: filter receiver's index.jsonl by host / sample
/ time range, sort, count-by group. Closest thing to a query
interface until we stand up Postgres/Timescale.
samples/README.md
- Rewritten to match the new manifest schema, the kind=real vs mimic
split, the per-(host, slot, ep) selection mechanic, and the
chunked-upload safety story.
Tests: 106 pass (was 102). New cases:
- test_qmp.py — savevm + loadvm (HMP wrapper + error path)
- test_tier4.py — chunked plan splitting, sha-pinned finalize,
end-to-end driver walks all chunks + verify + exec via the fake
msfrpc client
Closes the "what is not implemented" punch list.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
bdcd2ecbef
commit
a88ac83db0
12 changed files with 811 additions and 88 deletions
|
|
@ -112,6 +112,24 @@ class QMPClient:
|
||||||
# Otherwise it's an async event; ignore and keep reading.
|
# Otherwise it's an async event; ignore and keep reading.
|
||||||
raise QMPError(f"{command}: too many async events without a response")
|
raise QMPError(f"{command}: too many async events without a response")
|
||||||
|
|
||||||
|
# ---- snapshot / revert (via human-monitor-command) -----------------
|
||||||
|
|
||||||
|
def savevm(self, name: str) -> str:
|
||||||
|
"""``savevm <name>`` — capture a live VM snapshot inside the
|
||||||
|
qcow2. Returns the monitor's reply (empty string on success).
|
||||||
|
Requires the disk to be qcow2 (our launchers always are)."""
|
||||||
|
return self._hmp(f"savevm {name}")
|
||||||
|
|
||||||
|
def loadvm(self, name: str) -> str:
|
||||||
|
"""``loadvm <name>`` — restore the named snapshot. The guest
|
||||||
|
is paused, restored, and resumed; collectors continue
|
||||||
|
sampling and just see a sharp transition."""
|
||||||
|
return self._hmp(f"loadvm {name}")
|
||||||
|
|
||||||
|
def _hmp(self, cmd: str) -> str:
|
||||||
|
out = self.execute("human-monitor-command", **{"command-line": cmd})
|
||||||
|
return out if isinstance(out, str) else ""
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
if self._sock is not None:
|
if self._sock is not None:
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,9 @@ Type=simple
|
||||||
User=cis490
|
User=cis490
|
||||||
Group=cis490
|
Group=cis490
|
||||||
WorkingDirectory=/opt/cis490
|
WorkingDirectory=/opt/cis490
|
||||||
EnvironmentFile=-/etc/cis490/lab-host.toml.env
|
# /etc/cis490/lab-host.env is written by scripts/install-lab-host.sh;
|
||||||
|
# carries FLEET_HOST_ID, BRIDGE, and any operator-supplied overrides.
|
||||||
|
EnvironmentFile=/etc/cis490/lab-host.env
|
||||||
# Fleet mode: detect host capacity, run that many concurrent episodes
|
# Fleet mode: detect host capacity, run that many concurrent episodes
|
||||||
# per wave with samples drawn from the manifest. Each invocation runs
|
# per wave with samples drawn from the manifest. Each invocation runs
|
||||||
# one wave and exits; systemd respawns per Restart= below, giving us
|
# one wave and exits; systemd respawns per Restart= below, giving us
|
||||||
|
|
@ -24,6 +26,18 @@ ExecStart=/opt/cis490/.venv/bin/python /opt/cis490/tools/run_fleet.py \
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=15
|
RestartSec=15
|
||||||
|
|
||||||
|
# Hardening — note we explicitly grant CAP_NET_RAW for tcpdump
|
||||||
|
# (source 4) and CAP_SYS_ADMIN for perf (source 3) when the operator
|
||||||
|
# enables those. Both are inherited by per-episode subprocesses.
|
||||||
|
NoNewPrivileges=false
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=/var/lib/cis490 /tmp
|
||||||
|
SupplementaryGroups=kvm
|
||||||
|
AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_PERFMON
|
||||||
|
CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_PERFMON CAP_DAC_READ_SEARCH
|
||||||
|
|
||||||
# Hardening
|
# Hardening
|
||||||
NoNewPrivileges=true
|
NoNewPrivileges=true
|
||||||
PrivateTmp=true
|
PrivateTmp=true
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,10 @@ from samples.manifest import Sample
|
||||||
|
|
||||||
from .modules import ModuleConfig
|
from .modules import ModuleConfig
|
||||||
from .msfrpc import MSFRpcClient, wait_for_new_session
|
from .msfrpc import MSFRpcClient, wait_for_new_session
|
||||||
from .workloads import Workload, real_binary_workload, workload_for
|
from .workloads import (
|
||||||
|
ChunkedUpload, Workload, chunked_real_binary_upload,
|
||||||
|
real_binary_workload, workload_for,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger("cis490.exploits.driver")
|
log = logging.getLogger("cis490.exploits.driver")
|
||||||
|
|
@ -83,6 +86,8 @@ class MSFExploitDriver:
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
self.emit = emit_event
|
self.emit = emit_event
|
||||||
self.sample = sample
|
self.sample = sample
|
||||||
|
# Chunked upload plan (None unless real binary path applies).
|
||||||
|
self._chunked: ChunkedUpload | None = None
|
||||||
self.workload: Workload | None = self._resolve_workload(sample)
|
self.workload: Workload | None = self._resolve_workload(sample)
|
||||||
|
|
||||||
self._sessions_seen_at_arm: set[int] = set()
|
self._sessions_seen_at_arm: set[int] = set()
|
||||||
|
|
@ -92,7 +97,8 @@ class MSFExploitDriver:
|
||||||
|
|
||||||
def _resolve_workload(self, sample: Sample | None) -> Workload | None:
|
def _resolve_workload(self, sample: Sample | None) -> Workload | None:
|
||||||
"""Pick the best workload for this sample:
|
"""Pick the best workload for this sample:
|
||||||
1. real binary (if staged at samples/store/<sha256>) → upload + exec
|
1. real binary (if staged at samples/store/<sha256>) → chunked
|
||||||
|
upload + exec via dedicated dispatch path
|
||||||
2. profile mimic from exploits.workloads
|
2. profile mimic from exploits.workloads
|
||||||
3. None → driver v1 fallback (yes-loop)
|
3. None → driver v1 fallback (yes-loop)
|
||||||
"""
|
"""
|
||||||
|
|
@ -103,7 +109,19 @@ class MSFExploitDriver:
|
||||||
if bin_path is not None:
|
if bin_path is not None:
|
||||||
try:
|
try:
|
||||||
payload = bin_path.read_bytes()
|
payload = bin_path.read_bytes()
|
||||||
return real_binary_workload(payload, sample=sample)
|
self._chunked = chunked_real_binary_upload(payload, sample=sample)
|
||||||
|
# Return a Workload shell so the rest of the driver
|
||||||
|
# can treat the dispatch uniformly. start_cmd is
|
||||||
|
# never sent verbatim — _start_workload walks the
|
||||||
|
# chunked plan instead.
|
||||||
|
return Workload(
|
||||||
|
profile=self._chunked.profile,
|
||||||
|
start_cmd="(chunked-upload-managed-by-driver)",
|
||||||
|
stop_cmd=self._chunked.stop_cmd,
|
||||||
|
description=f"Real binary chunked upload+execute "
|
||||||
|
f"({len(payload)} bytes, "
|
||||||
|
f"{self._chunked.n_chunks} chunks)",
|
||||||
|
)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
log.warning("could not read real sample %s: %s; falling back", bin_path, e)
|
log.warning("could not read real sample %s: %s; falling back", bin_path, e)
|
||||||
return workload_for(sample)
|
return workload_for(sample)
|
||||||
|
|
@ -217,8 +235,11 @@ class MSFExploitDriver:
|
||||||
if self._session_id is None:
|
if self._session_id is None:
|
||||||
log.warning("infected_running with no session — skipping workload")
|
log.warning("infected_running with no session — skipping workload")
|
||||||
return
|
return
|
||||||
|
if self._chunked is not None:
|
||||||
|
self._upload_real_binary_chunked()
|
||||||
|
return
|
||||||
if self.workload is not None:
|
if self.workload is not None:
|
||||||
# Driver v2 — profile-matched workload.
|
# Driver v2 — profile-matched mimic workload.
|
||||||
self.client.session_shell_write(self._session_id, self.workload.start_cmd)
|
self.client.session_shell_write(self._session_id, self.workload.start_cmd)
|
||||||
self.emit(
|
self.emit(
|
||||||
"sample_executed",
|
"sample_executed",
|
||||||
|
|
@ -240,6 +261,60 @@ class MSFExploitDriver:
|
||||||
command=self.cfg.workload_cmd,
|
command=self.cfg.workload_cmd,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _upload_real_binary_chunked(self) -> None:
|
||||||
|
"""Walk the ChunkedUpload plan: each chunk is a separate
|
||||||
|
shell_write so msfrpc never sees a buffer-busting payload.
|
||||||
|
Verifies the in-guest sha256 before exec; emits per-step
|
||||||
|
events so we have a wire-level audit trail of Tier-4 runs."""
|
||||||
|
plan = self._chunked
|
||||||
|
assert plan is not None and self._session_id is not None
|
||||||
|
sid = self._session_id
|
||||||
|
|
||||||
|
self.emit(
|
||||||
|
"real_binary_upload_begin",
|
||||||
|
session_id=sid,
|
||||||
|
n_chunks=plan.n_chunks,
|
||||||
|
sha256=plan.expected_sha256,
|
||||||
|
sample=self.sample.name if self.sample else None,
|
||||||
|
)
|
||||||
|
for i, chunk in enumerate(plan.chunks):
|
||||||
|
self.client.session_shell_write(sid, chunk)
|
||||||
|
# Read back so the next write doesn't race ahead of the
|
||||||
|
# previous one's prompt return. We don't parse it.
|
||||||
|
try:
|
||||||
|
self.client.session_shell_read(sid)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Decode + verify on the guest side.
|
||||||
|
self.client.session_shell_write(sid, plan.finalize_cmd)
|
||||||
|
try:
|
||||||
|
verify_out = self.client.session_shell_read(sid)
|
||||||
|
except Exception:
|
||||||
|
verify_out = ""
|
||||||
|
verified = "sha-ok" in verify_out
|
||||||
|
self.emit(
|
||||||
|
"real_binary_verify",
|
||||||
|
session_id=sid,
|
||||||
|
ok=verified,
|
||||||
|
output=verify_out.strip()[:256],
|
||||||
|
sha256=plan.expected_sha256,
|
||||||
|
)
|
||||||
|
if not verified:
|
||||||
|
self.emit("real_binary_aborted", session_id=sid, reason="sha mismatch")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Launch.
|
||||||
|
self.client.session_shell_write(sid, plan.exec_cmd)
|
||||||
|
self.emit(
|
||||||
|
"sample_executed",
|
||||||
|
session_id=sid,
|
||||||
|
profile=plan.profile,
|
||||||
|
sample=self.sample.name if self.sample else None,
|
||||||
|
sha256=plan.expected_sha256,
|
||||||
|
kind="real",
|
||||||
|
)
|
||||||
|
|
||||||
def _stop_workload(self) -> None:
|
def _stop_workload(self) -> None:
|
||||||
if self._session_id is None:
|
if self._session_id is None:
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -240,50 +240,102 @@ def all_profiles() -> list[str]:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
def real_binary_workload(binary_bytes: bytes, sample: Sample | None = None) -> Workload:
|
@dataclass(frozen=True)
|
||||||
"""Build a Workload that uploads ``binary_bytes`` to the guest via
|
class ChunkedUpload:
|
||||||
base64 over the shell session, executes it in the background, and
|
"""Multi-step upload plan. Each chunk is one ``shell_write`` call;
|
||||||
kills it on stop. Used when ``sample.kind == "real"`` and the
|
the driver issues them in order, then a final integrity check, then
|
||||||
fetcher has staged the binary at samples/store/<sha256>.
|
the exec command. The last command runs the binary and writes its
|
||||||
|
PID to ``pid_path``."""
|
||||||
|
profile: str
|
||||||
|
chunks: tuple[str, ...] # each is a complete shell command
|
||||||
|
finalize_cmd: str # decode + verify sha256 + chmod
|
||||||
|
exec_cmd: str # actually launch the binary
|
||||||
|
stop_cmd: str
|
||||||
|
bin_path: str
|
||||||
|
pid_path: str
|
||||||
|
expected_sha256: str
|
||||||
|
n_chunks: int
|
||||||
|
|
||||||
Caveats:
|
|
||||||
- The session must support ``base64 -d`` (busybox does, GNU does).
|
# Conservative chunk size: msfrpc shell_write payloads are reliable
|
||||||
- For binaries above ~512 KiB we'd want chunked upload; today
|
# under ~16 KiB (single TCP write inside the framework). Use 8 KiB of
|
||||||
we send it as one ``shell_write`` and rely on msfrpc to handle
|
# *base64* (which is 6 KiB of binary) per chunk so we leave room for
|
||||||
the buffer. 64 KiB-128 KiB samples (the typical
|
# the wrapper and stay well under the limit.
|
||||||
cryptominer / ELF backdoor size) work fine.
|
_CHUNK_B64_BYTES = 8 * 1024
|
||||||
"""
|
|
||||||
|
|
||||||
|
def chunked_real_binary_upload(
|
||||||
|
binary_bytes: bytes,
|
||||||
|
sample: Sample | None = None,
|
||||||
|
) -> ChunkedUpload:
|
||||||
|
"""Plan a chunked upload of ``binary_bytes`` into a shell session.
|
||||||
|
|
||||||
|
First chunk creates an empty file; subsequent chunks append a
|
||||||
|
base64 segment. ``finalize_cmd`` decodes + sha256-verifies the
|
||||||
|
result; ``exec_cmd`` launches the binary and stashes its PID.
|
||||||
|
The driver issues these as separate shell_writes so we never
|
||||||
|
push more than ~10 KiB through msfrpc in a single call."""
|
||||||
import base64 as _b64
|
import base64 as _b64
|
||||||
|
import hashlib as _hashlib
|
||||||
|
|
||||||
profile = (sample.profile if sample else "real-binary")
|
profile = (sample.profile if sample else "real-binary")
|
||||||
pid_path = f"/tmp/.cis490-real-{profile}.pid"
|
pid_path = f"/tmp/.cis490-real-{profile}.pid"
|
||||||
bin_path = f"/tmp/.cis490-real-{profile}.bin"
|
bin_path = f"/tmp/.cis490-real-{profile}.bin"
|
||||||
b64_path = f"/tmp/.cis490-real-{profile}.b64"
|
b64_path = f"/tmp/.cis490-real-{profile}.b64"
|
||||||
|
sha = _hashlib.sha256(binary_bytes).hexdigest()
|
||||||
encoded = _b64.b64encode(binary_bytes).decode("ascii")
|
encoded = _b64.b64encode(binary_bytes).decode("ascii")
|
||||||
# Insert newlines every 76 chars so the heredoc is friendly to
|
|
||||||
# any line-buffered intermediary.
|
|
||||||
chunked = "\n".join(encoded[i:i+76] for i in range(0, len(encoded), 76))
|
|
||||||
|
|
||||||
start = (
|
chunks: list[str] = []
|
||||||
f"mkdir -p /tmp; "
|
chunks.append(f"mkdir -p /tmp; : > {b64_path}; echo upload-begin")
|
||||||
f"cat > {b64_path} <<'CIS490_B64_EOF'\n"
|
for i in range(0, len(encoded), _CHUNK_B64_BYTES):
|
||||||
f"{chunked}\n"
|
seg = encoded[i:i + _CHUNK_B64_BYTES]
|
||||||
f"CIS490_B64_EOF\n"
|
# printf '%s' avoids interpreting '%' / '\\' inside the b64 chars.
|
||||||
f"base64 -d {b64_path} > {bin_path} && chmod +x {bin_path} && rm -f {b64_path}\n"
|
chunks.append(f"printf '%s' '{seg}' >> {b64_path}")
|
||||||
f"nohup {bin_path} </dev/null >/dev/null 2>&1 &\n"
|
|
||||||
f"echo $! > {pid_path}\n"
|
finalize = (
|
||||||
f"disown\n"
|
f"base64 -d {b64_path} > {bin_path} && rm -f {b64_path} && "
|
||||||
|
f"chmod +x {bin_path} && "
|
||||||
|
f"GOT=$(sha256sum {bin_path} | awk '{{print $1}}') && "
|
||||||
|
f"if [ \"$GOT\" = \"{sha}\" ]; then echo sha-ok; "
|
||||||
|
f"else echo sha-mismatch:$GOT; rm -f {bin_path}; false; fi"
|
||||||
|
)
|
||||||
|
exec_cmd = (
|
||||||
|
f"nohup {bin_path} </dev/null >/dev/null 2>&1 & "
|
||||||
|
f"echo $! > {pid_path}; disown; echo exec-ok"
|
||||||
)
|
)
|
||||||
stop = (
|
stop = (
|
||||||
f"if [ -f {pid_path} ]; then "
|
f"if [ -f {pid_path} ]; then "
|
||||||
f" kill -- -$(cat {pid_path}) 2>/dev/null; "
|
f" kill -- -$(cat {pid_path}) 2>/dev/null; "
|
||||||
f" kill $(cat {pid_path}) 2>/dev/null; "
|
f" kill $(cat {pid_path}) 2>/dev/null; "
|
||||||
f" rm -f {pid_path} {bin_path}; "
|
f" rm -f {pid_path} {bin_path}; "
|
||||||
f"fi; true\n"
|
f"fi; true"
|
||||||
)
|
)
|
||||||
return Workload(
|
return ChunkedUpload(
|
||||||
profile=f"real:{profile}",
|
profile=f"real:{profile}",
|
||||||
start_cmd=start,
|
chunks=tuple(chunks),
|
||||||
|
finalize_cmd=finalize,
|
||||||
|
exec_cmd=exec_cmd,
|
||||||
stop_cmd=stop,
|
stop_cmd=stop,
|
||||||
description=f"Real binary upload+execute ({len(binary_bytes)} bytes)",
|
bin_path=bin_path,
|
||||||
|
pid_path=pid_path,
|
||||||
|
expected_sha256=sha,
|
||||||
|
n_chunks=len(chunks),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def real_binary_workload(binary_bytes: bytes, sample: Sample | None = None) -> Workload:
|
||||||
|
"""Backwards-compat wrapper that produces a single-shot Workload
|
||||||
|
by concatenating a chunked plan into one start_cmd. Kept for
|
||||||
|
callers that drive the v1 single-shell-write flow (e.g. tests).
|
||||||
|
|
||||||
|
Production path: the driver should call ``chunked_real_binary_upload``
|
||||||
|
and walk the chunks itself so msfrpc never sees a buffer-busting
|
||||||
|
payload."""
|
||||||
|
plan = chunked_real_binary_upload(binary_bytes, sample=sample)
|
||||||
|
start = "\n".join(list(plan.chunks) + [plan.finalize_cmd, plan.exec_cmd]) + "\n"
|
||||||
|
return Workload(
|
||||||
|
profile=plan.profile,
|
||||||
|
start_cmd=start,
|
||||||
|
stop_cmd=plan.stop_cmd,
|
||||||
|
description=f"Real binary upload+execute ({len(binary_bytes)} bytes, {plan.n_chunks} chunks)",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,14 @@ class EpisodeConfig:
|
||||||
# explicitly per-episode when the host supports it.
|
# explicitly per-episode when the host supports it.
|
||||||
enable_perf: bool = False
|
enable_perf: bool = False
|
||||||
perf_interval_ms: int = 100
|
perf_interval_ms: int = 100
|
||||||
|
# Snapshot/revert (Tier 0+):
|
||||||
|
# revert_at_start — before any phase walks, loadvm <snapshot_name>.
|
||||||
|
# Use this to drop the guest back to a known-good baseline at
|
||||||
|
# the start of every episode in a long-lived-VM fleet loop.
|
||||||
|
# revert_at_end — after the schedule walks, loadvm <snapshot_name>
|
||||||
|
# so the next consumer of this VM starts clean too.
|
||||||
|
revert_at_start: bool = False
|
||||||
|
revert_at_end: bool = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -123,6 +131,32 @@ class EpisodeRunner:
|
||||||
|
|
||||||
self.emit_event("snapshot_load", snapshot=self.cfg.snapshot_name)
|
self.emit_event("snapshot_load", snapshot=self.cfg.snapshot_name)
|
||||||
|
|
||||||
|
# Snapshot revert at start: pause+restore the guest to a known
|
||||||
|
# baseline before phase 0. Requires QMP and a savevm having
|
||||||
|
# already taken place (the launcher is responsible for that).
|
||||||
|
if self.cfg.revert_at_start and self.cfg.qmp_socket is not None:
|
||||||
|
try:
|
||||||
|
client = qmp.QMPClient(self.cfg.qmp_socket)
|
||||||
|
client.connect()
|
||||||
|
try:
|
||||||
|
out = client.loadvm(self.cfg.snapshot_name)
|
||||||
|
self.emit_event(
|
||||||
|
"snapshot_revert",
|
||||||
|
when="start",
|
||||||
|
snapshot=self.cfg.snapshot_name,
|
||||||
|
output=(out or "").strip()[:256],
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("loadvm at start failed: %s", e)
|
||||||
|
self.emit_event(
|
||||||
|
"snapshot_revert_failed",
|
||||||
|
when="start",
|
||||||
|
snapshot=self.cfg.snapshot_name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
rows_holder: dict[str, int] = {"proc": 0, "qmp": 0, "guest": 0, "netflow": 0, "perf": 0}
|
rows_holder: dict[str, int] = {"proc": 0, "qmp": 0, "guest": 0, "netflow": 0, "perf": 0}
|
||||||
pcap_handle: pcap.CaptureHandle | None = None
|
pcap_handle: pcap.CaptureHandle | None = None
|
||||||
pcap_path = self.episode_dir / "network.pcap"
|
pcap_path = self.episode_dir / "network.pcap"
|
||||||
|
|
@ -198,6 +232,32 @@ class EpisodeRunner:
|
||||||
phases_observed = ["clean"]
|
phases_observed = ["clean"]
|
||||||
self._stop.wait(timeout=self.cfg.duration_s)
|
self._stop.wait(timeout=self.cfg.duration_s)
|
||||||
finally:
|
finally:
|
||||||
|
# Optional revert before stopping collectors so the
|
||||||
|
# transition shows up in their telemetry too — useful for
|
||||||
|
# building "snapshot revert" as a labeled phase later.
|
||||||
|
if self.cfg.revert_at_end and self.cfg.qmp_socket is not None:
|
||||||
|
try:
|
||||||
|
client = qmp.QMPClient(self.cfg.qmp_socket)
|
||||||
|
client.connect()
|
||||||
|
try:
|
||||||
|
out = client.loadvm(self.cfg.snapshot_name)
|
||||||
|
self.emit_event(
|
||||||
|
"snapshot_revert",
|
||||||
|
when="end",
|
||||||
|
snapshot=self.cfg.snapshot_name,
|
||||||
|
output=(out or "").strip()[:256],
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("loadvm at end failed: %s", e)
|
||||||
|
self.emit_event(
|
||||||
|
"snapshot_revert_failed",
|
||||||
|
when="end",
|
||||||
|
snapshot=self.cfg.snapshot_name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
self._stop.set()
|
self._stop.set()
|
||||||
for t in threads:
|
for t in threads:
|
||||||
t.join(timeout=3.0)
|
t.join(timeout=3.0)
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,107 @@
|
||||||
# samples/
|
# samples/
|
||||||
|
|
||||||
**Sample binaries are NEVER committed to this repo.** This directory holds:
|
Catalog of malware (or behaviour-matched mimics) the fleet draws from.
|
||||||
|
**Sample binaries are NEVER committed to this repo.**
|
||||||
|
|
||||||
- `manifest.yaml` — sha256-pinned list of samples to fetch, with metadata
|
## What's here
|
||||||
(source, category, expected behavior, target CVE).
|
|
||||||
- `fetch.py` — script that pulls samples from configured sources
|
|
||||||
(MalwareBazaar, theZoo, vx-underground), verifies sha256, and stores them
|
|
||||||
under `samples/store/` (gitignored).
|
|
||||||
- Per-sample notes in markdown describing observed behavior in our lab.
|
|
||||||
|
|
||||||
`samples/store/` lives only on the lab host. It is gitignored *and* should
|
|
||||||
sit on a disk that is not auto-mounted on developer workstations.
|
|
||||||
|
|
||||||
## Manifest entry shape (placeholder)
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
samples:
|
|
||||||
- name: linux.miner.xmrig.elf
|
|
||||||
sha256: "..." # pinned
|
|
||||||
source: MalwareBazaar
|
|
||||||
category: miner
|
|
||||||
target_cve: null # cryptominers are usually post-exploit payloads
|
|
||||||
behavior: "high CPU, periodic stratum protocol traffic"
|
|
||||||
pairs_with_exploit: exploit/multi/samba/usermap_script
|
|
||||||
```
|
```
|
||||||
|
manifest.toml schema-checked catalog (loaded by samples/manifest.py)
|
||||||
|
manifest.py loader + per-(host_id, slot, ep) deterministic selection
|
||||||
|
store/ SHA-256-pinned binary content (gitignored — never commit)
|
||||||
|
.bazaar.token MalwareBazaar API key (mode 0600, gitignored)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Manifest schema
|
||||||
|
|
||||||
|
Each entry in `manifest.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[[sample]]
|
||||||
|
name = "xmrig-cryptominer" # unique within manifest, DNS-safe
|
||||||
|
family = "XMRig" # canonical family label for ML
|
||||||
|
category = "cryptominer" # one of: cryptominer, botnet, ransomware,
|
||||||
|
# banking-trojan, fileless, rat, worm,
|
||||||
|
# loader, wiper, other
|
||||||
|
profile = "cpu-saturate" # behaviour profile from
|
||||||
|
# exploits/workloads.py — gates the
|
||||||
|
# in-session shell workload when no
|
||||||
|
# real binary is staged
|
||||||
|
description = "..."
|
||||||
|
|
||||||
|
# Optional — present iff this is a real binary the fetcher should pull:
|
||||||
|
sha256 = "abc123..."
|
||||||
|
source = "MalwareBazaar"
|
||||||
|
url = "https://bazaar.abuse.ch/sample/abc123/"
|
||||||
|
```
|
||||||
|
|
||||||
|
The loader rejects unknown categories and duplicate names. See
|
||||||
|
`tests/test_fleet.py` for the property tests covering selection
|
||||||
|
distribution + catalog walkability.
|
||||||
|
|
||||||
|
## "real" vs "mimic"
|
||||||
|
|
||||||
|
`Sample.kind` is **`"real"`** when `sha256` is set, otherwise **`"mimic"`**.
|
||||||
|
|
||||||
|
- **Mimic** — the orchestrator runs the matching profile-shaped shell
|
||||||
|
command (cpu-saturate / scan-and-dial / io-walk / bursty-c2 /
|
||||||
|
low-and-slow / shell-resident) inside the guest. No real binary
|
||||||
|
needed; useful right now for testing the dataset pipeline and as
|
||||||
|
the realistic-but-safe envelope class the trainer expects.
|
||||||
|
- **Real** — the orchestrator's Tier-3+ driver chunked-uploads
|
||||||
|
`samples/store/<sha256>` into the shell session, sha256-verifies on
|
||||||
|
the guest side, and execs it. Hash mismatch fail-stops the run; a
|
||||||
|
tampered binary is never executed.
|
||||||
|
|
||||||
|
`meta.sample.kind` lands in every episode's `meta.json`, so trainers
|
||||||
|
can stratify on it (the realistic-model path consumes only
|
||||||
|
`kind == "real"` episodes by default).
|
||||||
|
|
||||||
|
## Fetching a real binary
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# 1. Register a (free) account at https://bazaar.abuse.ch and get the API key.
|
||||||
|
echo "<your-key>" > samples/.bazaar.token
|
||||||
|
chmod 0600 samples/.bazaar.token
|
||||||
|
|
||||||
|
# 2. Add an entry with sha256+source+url to manifest.toml.
|
||||||
|
|
||||||
|
# 3. Pull the binary into samples/store/<sha256>:
|
||||||
|
uv run python tools/fetch_sample.py <sha256>
|
||||||
|
```
|
||||||
|
|
||||||
|
Idempotent — re-running checks the staged copy's sha256 and skips the
|
||||||
|
download if it already matches.
|
||||||
|
|
||||||
|
## Per-(host, slot, episode) selection
|
||||||
|
|
||||||
|
`manifest.py::SampleManifest.select(host_id, slot, episode_index)`
|
||||||
|
hashes those three into a uniform integer and indexes the catalog.
|
||||||
|
Two lab hosts on the same slot pick *different* samples (collision
|
||||||
|
rate ~1/N). A single host walks the whole catalog within ~`len(manifest)`
|
||||||
|
episodes. No coordinator.
|
||||||
|
|
||||||
## Safety rules
|
## Safety rules
|
||||||
|
|
||||||
- Only download to the lab host, never to a developer workstation.
|
- **Only download to a lab host, never to a developer workstation.**
|
||||||
- Verify sha256 immediately, before any other read.
|
`samples/store/` lives only there, gitignored, on a disk that is
|
||||||
- Keep the directory on a path that is *not* on the WG overlay.
|
not auto-mounted elsewhere.
|
||||||
- Re-verify sha256 before each detonation; refuse to run on mismatch.
|
- The lab host's `br-malware` bridge is host-only by design (no NAT,
|
||||||
|
no route). Real malware running in the guest cannot call out unless
|
||||||
|
the operator explicitly opens egress, which we don't.
|
||||||
|
- Snapshot/revert (see `EpisodeConfig.revert_at_*` + `qmp.savevm`/
|
||||||
|
`loadvm`) means every fresh episode starts from a known-good
|
||||||
|
baseline regardless of what the previous one did to the guest.
|
||||||
|
- The fetcher verifies sha256 on download; the driver verifies again
|
||||||
|
in-guest before exec. Both layers must match the manifest.
|
||||||
|
|
||||||
|
## Adding a sample
|
||||||
|
|
||||||
|
1. Pick a `family` + `category` from the closed enum above.
|
||||||
|
2. Pick a `profile` from `exploits/workloads.all_profiles()`. If the
|
||||||
|
sample's behaviour doesn't match any of the six existing shapes,
|
||||||
|
add a new factory to `exploits/workloads.py` *first*, with tests.
|
||||||
|
3. (Real-only) Compute `sha256`, fetch via `tools/fetch_sample.py`,
|
||||||
|
verify the staged file's hash matches.
|
||||||
|
4. Append the entry to `manifest.toml`.
|
||||||
|
5. Run the test suite — the manifest loader's invariants catch typos.
|
||||||
|
|
|
||||||
62
scripts/fetch-alpine-baseline.sh
Executable file
62
scripts/fetch-alpine-baseline.sh
Executable file
|
|
@ -0,0 +1,62 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Fetch the Alpine 3.21 NoCloud cloud-init image used as the Tier-1/2
|
||||||
|
# baseline guest. Convert to qcow2 if necessary; verify sha512 against
|
||||||
|
# the value pinned in docs/sources.md.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# scripts/fetch-alpine-baseline.sh <out_path>
|
||||||
|
#
|
||||||
|
# Examples:
|
||||||
|
# scripts/fetch-alpine-baseline.sh vm/images/alpine-baseline.qcow2
|
||||||
|
# sudo scripts/fetch-alpine-baseline.sh /var/lib/cis490/vm/images/alpine-baseline.qcow2
|
||||||
|
#
|
||||||
|
# Idempotent — re-runs check the destination and short-circuit if the
|
||||||
|
# checksum already matches.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
OUT="${1:-}"
|
||||||
|
if [[ -z "$OUT" ]]; then
|
||||||
|
echo "usage: $0 <out_path>" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
URL="https://dl-cdn.alpinelinux.org/alpine/v3.21/releases/cloud/nocloud_alpine-3.21.0-x86_64-bios-cloudinit-r0.qcow2"
|
||||||
|
SHA512="bb509092cda3548c11bc48a2168ce950d654b50db006e98939c06a5d86487f4e53cbb7954fafbba9ab5c8098008a9f304421ffc3397b0bc1d87b6aa309239b98"
|
||||||
|
|
||||||
|
log() { printf '[fetch-alpine] %s\n' "$*" >&2; }
|
||||||
|
|
||||||
|
if [[ -f "$OUT" ]]; then
|
||||||
|
actual="$(sha512sum "$OUT" | awk '{print $1}')"
|
||||||
|
if [[ "$actual" == "$SHA512" ]]; then
|
||||||
|
log "$OUT already present and verified"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
log "$OUT exists but checksum differs — refetching"
|
||||||
|
rm -f "$OUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$(dirname "$OUT")"
|
||||||
|
TMP="$OUT.partial"
|
||||||
|
trap 'rm -f "$TMP"' EXIT
|
||||||
|
|
||||||
|
log "downloading $URL"
|
||||||
|
if command -v curl >/dev/null; then
|
||||||
|
curl -fL --retry 3 --retry-delay 5 -o "$TMP" "$URL"
|
||||||
|
elif command -v wget >/dev/null; then
|
||||||
|
wget -O "$TMP" "$URL"
|
||||||
|
else
|
||||||
|
log "neither curl nor wget on PATH"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "verifying sha512"
|
||||||
|
actual="$(sha512sum "$TMP" | awk '{print $1}')"
|
||||||
|
if [[ "$actual" != "$SHA512" ]]; then
|
||||||
|
log "sha512 mismatch: expected $SHA512, got $actual"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mv "$TMP" "$OUT"
|
||||||
|
trap - EXIT
|
||||||
|
log "wrote $OUT ($(stat -c%s "$OUT") bytes)"
|
||||||
|
|
@ -94,6 +94,45 @@ if [[ ! -f "$ETC_ROOT/lab-host.toml" ]]; then
|
||||||
log "writing $ETC_ROOT/lab-host.toml (template)"
|
log "writing $ETC_ROOT/lab-host.toml (template)"
|
||||||
install -m 0640 -o root -g "$SERVICE_USER" \
|
install -m 0640 -o root -g "$SERVICE_USER" \
|
||||||
"$REPO_ROOT/etc/lab-host.toml.example" "$ETC_ROOT/lab-host.toml"
|
"$REPO_ROOT/etc/lab-host.toml.example" "$ETC_ROOT/lab-host.toml"
|
||||||
|
NEW_INSTALL=1
|
||||||
|
else
|
||||||
|
log "$ETC_ROOT/lab-host.toml exists; leaving in place"
|
||||||
|
NEW_INSTALL=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- 6. orchestrator env file (read by cis490-orchestrator.service) ----
|
||||||
|
ENV_FILE="$ETC_ROOT/lab-host.env"
|
||||||
|
DEFAULT_HOST_ID="$(hostname -s)"
|
||||||
|
if [[ ! -f "$ENV_FILE" ]]; then
|
||||||
|
log "writing $ENV_FILE (host_id defaults to $DEFAULT_HOST_ID — edit if you want something else)"
|
||||||
|
install -m 0640 -o root -g "$SERVICE_USER" /dev/stdin "$ENV_FILE" <<EOF
|
||||||
|
# Read by cis490-orchestrator.service. Override per-host as needed.
|
||||||
|
FLEET_HOST_ID=$DEFAULT_HOST_ID
|
||||||
|
# BRIDGE=br-malware # uncomment to enable source 4 pcap capture
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- 7. baseline VM image + cidata (best-effort) -----------------------
|
||||||
|
ALPINE_IMG="$DATA_ROOT/vm/images/alpine-baseline.qcow2"
|
||||||
|
CIDATA_ISO="$DATA_ROOT/vm/images/cidata.iso"
|
||||||
|
if [[ ! -f "$ALPINE_IMG" ]]; then
|
||||||
|
if "$REPO_ROOT/scripts/fetch-alpine-baseline.sh" "$ALPINE_IMG"; then
|
||||||
|
log "fetched Alpine baseline -> $ALPINE_IMG"
|
||||||
|
else
|
||||||
|
log "WARN: Alpine baseline fetch failed; drop a qcow2 at $ALPINE_IMG manually"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [[ -f "$ALPINE_IMG" && ! -f "$CIDATA_ISO" ]]; then
|
||||||
|
log "building cidata.iso (in-guest agent embedded)"
|
||||||
|
sudo -u "$SERVICE_USER" -- "$INSTALL_ROOT/.venv/bin/python" \
|
||||||
|
"$INSTALL_ROOT/tools/build_cidata.py" "$CIDATA_ISO" || \
|
||||||
|
log "WARN: cidata build failed; run tools/build_cidata.py manually"
|
||||||
|
fi
|
||||||
|
# Symlink the canonical paths the launchers look at, when missing.
|
||||||
|
ln -sf "$ALPINE_IMG" "$INSTALL_ROOT/vm/images/alpine-baseline.qcow2" 2>/dev/null || true
|
||||||
|
ln -sf "$CIDATA_ISO" "$INSTALL_ROOT/vm/images/cidata.iso" 2>/dev/null || true
|
||||||
|
|
||||||
|
if [[ "$NEW_INSTALL" == "1" ]]; then
|
||||||
log ""
|
log ""
|
||||||
log "FIRST-INSTALL NEXT STEPS:"
|
log "FIRST-INSTALL NEXT STEPS:"
|
||||||
log " 1. Edit $ETC_ROOT/lab-host.toml — set host_id and receiver URL."
|
log " 1. Edit $ETC_ROOT/lab-host.toml — set host_id and receiver URL."
|
||||||
|
|
@ -104,9 +143,7 @@ if [[ ! -f "$ETC_ROOT/lab-host.toml" ]]; then
|
||||||
log " 3. Smoke-test the receiver pipe:"
|
log " 3. Smoke-test the receiver pipe:"
|
||||||
log " sudo -u $SERVICE_USER $INSTALL_ROOT/.venv/bin/python -m shipper \\"
|
log " sudo -u $SERVICE_USER $INSTALL_ROOT/.venv/bin/python -m shipper \\"
|
||||||
log " --config $ETC_ROOT/lab-host.toml --ping"
|
log " --config $ETC_ROOT/lab-host.toml --ping"
|
||||||
log " 4. systemctl enable --now cis490-shipper"
|
log " 4. systemctl enable --now cis490-shipper cis490-orchestrator"
|
||||||
else
|
|
||||||
log "$ETC_ROOT/lab-host.toml exists; leaving in place"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "lab-host install complete."
|
log "lab-host install complete."
|
||||||
|
|
|
||||||
|
|
@ -283,6 +283,44 @@ def test_run_loop_writes_rows_and_stops_cleanly(qmp_server: Path, tmp_path: Path
|
||||||
assert r["vm_running"] is True
|
assert r["vm_running"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_savevm_and_loadvm_via_human_monitor(qmp_server: Path) -> None:
|
||||||
|
server = FakeQMPServer(
|
||||||
|
qmp_server,
|
||||||
|
responses={
|
||||||
|
"human-monitor-command": {"return": ""},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
server.start()
|
||||||
|
try:
|
||||||
|
client = qmp.QMPClient(qmp_server)
|
||||||
|
client.connect()
|
||||||
|
out_save = client.savevm("baseline")
|
||||||
|
out_load = client.loadvm("baseline")
|
||||||
|
assert out_save == ""
|
||||||
|
assert out_load == ""
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
server.shutdown()
|
||||||
|
# Both calls go out as human-monitor-command with the right cmdline.
|
||||||
|
hmcs = [m for m in server.received if m.get("execute") == "human-monitor-command"]
|
||||||
|
cmds = [m["arguments"]["command-line"] for m in hmcs]
|
||||||
|
assert "savevm baseline" in cmds
|
||||||
|
assert "loadvm baseline" in cmds
|
||||||
|
|
||||||
|
|
||||||
|
def test_loadvm_surface_error(qmp_server: Path) -> None:
|
||||||
|
server = FakeQMPServer(qmp_server) # no responses → error reply
|
||||||
|
server.start()
|
||||||
|
try:
|
||||||
|
client = qmp.QMPClient(qmp_server)
|
||||||
|
client.connect()
|
||||||
|
with pytest.raises(qmp.QMPError):
|
||||||
|
client.loadvm("does-not-exist")
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
server.shutdown()
|
||||||
|
|
||||||
|
|
||||||
def test_run_loop_returns_zero_when_socket_missing(tmp_path: Path) -> None:
|
def test_run_loop_returns_zero_when_socket_missing(tmp_path: Path) -> None:
|
||||||
# No server bound to the socket path.
|
# No server bound to the socket path.
|
||||||
rows = qmp.run_loop(
|
rows = qmp.run_loop(
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,9 @@ import pytest
|
||||||
|
|
||||||
from exploits.driver import DriverConfig, MSFExploitDriver
|
from exploits.driver import DriverConfig, MSFExploitDriver
|
||||||
from exploits.modules import load_module_config
|
from exploits.modules import load_module_config
|
||||||
from exploits.workloads import real_binary_workload
|
from exploits.workloads import (
|
||||||
|
chunked_real_binary_upload, real_binary_workload,
|
||||||
|
)
|
||||||
from samples.manifest import Sample
|
from samples.manifest import Sample
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -35,17 +37,45 @@ from tests.test_exploits import FakeMSFRpcClient # noqa: E402
|
||||||
def test_real_binary_workload_embeds_base64() -> None:
|
def test_real_binary_workload_embeds_base64() -> None:
|
||||||
payload = b"\x7fELF" + b"\x00" * 64 # tiny ELF-shaped header
|
payload = b"\x7fELF" + b"\x00" * 64 # tiny ELF-shaped header
|
||||||
w = real_binary_workload(payload)
|
w = real_binary_workload(payload)
|
||||||
# Start command must contain a base64 chunk that decodes back to
|
# Start command bundles a chunked upload (printf '%s' '<b64>' >> file).
|
||||||
# our bytes.
|
# Pull all b64 segments out and confirm they round-trip.
|
||||||
assert "CIS490_B64_EOF" in w.start_cmd
|
|
||||||
# Find the base64 block.
|
|
||||||
import base64 as _b64
|
import base64 as _b64
|
||||||
body = w.start_cmd.split("CIS490_B64_EOF", 1)[1]
|
import re
|
||||||
body = body.split("CIS490_B64_EOF", 1)[0]
|
matches = re.findall(r"printf '%s' '([A-Za-z0-9+/=]+)'", w.start_cmd)
|
||||||
decoded = _b64.b64decode("".join(body.split()))
|
assert matches, "expected printf-based b64 chunks in start_cmd"
|
||||||
|
decoded = _b64.b64decode("".join(matches))
|
||||||
assert decoded == payload
|
assert decoded == payload
|
||||||
|
|
||||||
|
|
||||||
|
def test_chunked_real_binary_upload_splits_correctly() -> None:
|
||||||
|
"""A binary larger than the chunk size should produce >1 chunks
|
||||||
|
plus a finalize + exec. Each chunk's payload must be individually
|
||||||
|
valid base64 and the concatenation must round-trip."""
|
||||||
|
import base64 as _b64
|
||||||
|
import hashlib as _hashlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Build a payload large enough to force multiple chunks.
|
||||||
|
payload = (b"\x90\xab" * 8000)
|
||||||
|
plan = chunked_real_binary_upload(payload)
|
||||||
|
assert plan.n_chunks >= 3 # 1 init + 2+ data chunks
|
||||||
|
assert plan.expected_sha256 == _hashlib.sha256(payload).hexdigest()
|
||||||
|
|
||||||
|
# Reconstruct from chunks.
|
||||||
|
segs = []
|
||||||
|
for c in plan.chunks:
|
||||||
|
m = re.search(r"printf '%s' '([A-Za-z0-9+/=]+)'", c)
|
||||||
|
if m:
|
||||||
|
segs.append(m.group(1))
|
||||||
|
assert segs, "no data chunks parsed"
|
||||||
|
decoded = _b64.b64decode("".join(segs))
|
||||||
|
assert decoded == payload
|
||||||
|
|
||||||
|
# finalize_cmd verifies the sha256 we computed.
|
||||||
|
assert plan.expected_sha256 in plan.finalize_cmd
|
||||||
|
assert "sha256sum" in plan.finalize_cmd
|
||||||
|
|
||||||
|
|
||||||
def test_real_binary_workload_stop_kills_pidfile() -> None:
|
def test_real_binary_workload_stop_kills_pidfile() -> None:
|
||||||
w = real_binary_workload(b"x" * 16)
|
w = real_binary_workload(b"x" * 16)
|
||||||
assert "kill" in w.stop_cmd
|
assert "kill" in w.stop_cmd
|
||||||
|
|
@ -110,12 +140,72 @@ def test_driver_picks_real_binary_when_staged(tmp_path: Path) -> None:
|
||||||
emit_event=lambda *a, **kw: None,
|
emit_event=lambda *a, **kw: None,
|
||||||
sample=sample,
|
sample=sample,
|
||||||
)
|
)
|
||||||
|
# Driver picks the chunked-upload path.
|
||||||
assert driver.workload is not None
|
assert driver.workload is not None
|
||||||
# The workload's profile name encodes "real:..."
|
|
||||||
assert driver.workload.profile.startswith("real:")
|
assert driver.workload.profile.startswith("real:")
|
||||||
# Start cmd contains the b64 of our payload.
|
assert driver._chunked is not None
|
||||||
import base64 as _b64
|
assert driver._chunked.expected_sha256 == sha
|
||||||
assert _b64.b64encode(payload).decode("ascii")[:32] in driver.workload.start_cmd
|
|
||||||
|
|
||||||
|
def test_driver_walks_chunked_upload_in_session(tmp_path: Path) -> None:
|
||||||
|
"""End-to-end: at infected_running, the driver should issue every
|
||||||
|
chunk + finalize + exec as separate shell_write calls. The fake
|
||||||
|
client records them in order so we can verify."""
|
||||||
|
payload = b"\xde\xad\xbe\xef" * 4096 # 16 KiB → multiple chunks
|
||||||
|
sha = hashlib.sha256(payload).hexdigest()
|
||||||
|
(tmp_path / sha).write_bytes(payload)
|
||||||
|
|
||||||
|
sample = Sample(
|
||||||
|
name="real-multi", family="X", category="rat",
|
||||||
|
profile="bursty-c2", sha256=sha,
|
||||||
|
)
|
||||||
|
cfg = load_module_config(MODULES_DIR / "vsftpd_234_backdoor.toml")
|
||||||
|
|
||||||
|
# Patch the fake to return "sha-ok" so the verify step passes.
|
||||||
|
client = FakeMSFRpcClient(sessions_after_fire={1: {"type": "shell"}})
|
||||||
|
client._verify_response = "sha-ok\n"
|
||||||
|
real_read = client.session_shell_read
|
||||||
|
def shell_read_with_verify(sid):
|
||||||
|
# Return verify token after the finalize command — i.e. once
|
||||||
|
# the most recent shell_write contained "sha256sum".
|
||||||
|
last = client.shell_writes[-1][1] if client.shell_writes else ""
|
||||||
|
if "sha256sum" in last:
|
||||||
|
return "sha-ok\n"
|
||||||
|
return real_read(sid)
|
||||||
|
client.session_shell_read = shell_read_with_verify # type: ignore[assignment]
|
||||||
|
|
||||||
|
events: list[tuple[str, dict]] = []
|
||||||
|
driver = MSFExploitDriver(
|
||||||
|
client=client, # type: ignore[arg-type]
|
||||||
|
module=cfg,
|
||||||
|
cfg=DriverConfig(
|
||||||
|
target_ip="10.200.0.10",
|
||||||
|
session_open_timeout_s=0.5,
|
||||||
|
sample_store_root=tmp_path,
|
||||||
|
),
|
||||||
|
emit_event=lambda ev, **kw: events.append((ev, kw)),
|
||||||
|
sample=sample,
|
||||||
|
)
|
||||||
|
driver.setup()
|
||||||
|
driver.set_phase("armed")
|
||||||
|
driver.set_phase("infecting")
|
||||||
|
driver.set_phase("infected_running")
|
||||||
|
|
||||||
|
# All chunks + finalize + exec went through shell_write.
|
||||||
|
writes = [w for (_, w) in client.shell_writes]
|
||||||
|
n_printf = sum(1 for w in writes if w.startswith("printf '%s'"))
|
||||||
|
n_finalize = sum(1 for w in writes if "sha256sum" in w)
|
||||||
|
n_exec = sum(1 for w in writes if "nohup" in w and ".cis490-real" in w)
|
||||||
|
assert n_printf >= 2, f"expected multiple chunks, saw {n_printf}"
|
||||||
|
assert n_finalize == 1
|
||||||
|
assert n_exec == 1
|
||||||
|
|
||||||
|
# Events tell the same story.
|
||||||
|
names = [e for (e, _) in events]
|
||||||
|
assert "real_binary_upload_begin" in names
|
||||||
|
assert "real_binary_verify" in names
|
||||||
|
assert any(e == "sample_executed" and kw.get("kind") == "real"
|
||||||
|
for (e, kw) in events)
|
||||||
|
|
||||||
|
|
||||||
def test_driver_falls_back_to_mimic_when_real_binary_missing(tmp_path: Path) -> None:
|
def test_driver_falls_back_to_mimic_when_real_binary_missing(tmp_path: Path) -> None:
|
||||||
|
|
|
||||||
136
tools/index_reader.py
Normal file
136
tools/index_reader.py
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
"""Read + filter the receiver's ``index.jsonl``.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
# All episodes from one host:
|
||||||
|
cis490-index --host lab-host-1
|
||||||
|
|
||||||
|
# All episodes for a particular sample:
|
||||||
|
cis490-index --sample xmrig-cryptominer
|
||||||
|
|
||||||
|
# Today's episodes, sorted by size:
|
||||||
|
cis490-index --since 2026-04-30 --sort size
|
||||||
|
|
||||||
|
# Group/count by host:
|
||||||
|
cis490-index --count-by host_id
|
||||||
|
|
||||||
|
The index file is the closest thing to a database the receiver has
|
||||||
|
until we move to Postgres/Timescale. This tool is the temporary CLI
|
||||||
|
view over it; it's intentionally read-only and never opens episode
|
||||||
|
tarballs (just the index rows).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from collections import Counter
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_INDEX = "/var/lib/cis490/index.jsonl"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_since(s: str) -> datetime:
|
||||||
|
# Accept ISO-8601 with or without time.
|
||||||
|
for fmt in ("%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"):
|
||||||
|
try:
|
||||||
|
dt = datetime.strptime(s, fmt)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
# Last resort: fromisoformat which handles a wider range in 3.11+.
|
||||||
|
dt = datetime.fromisoformat(s)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt
|
||||||
|
|
||||||
|
|
||||||
|
def _row_time(row: dict) -> datetime | None:
|
||||||
|
s = row.get("received_at_wall")
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(s.replace("Z", "+00:00"))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
p = argparse.ArgumentParser(prog="cis490-index")
|
||||||
|
p.add_argument("--index", default=DEFAULT_INDEX,
|
||||||
|
help=f"path to index.jsonl (default {DEFAULT_INDEX})")
|
||||||
|
p.add_argument("--host", help="only rows from this host_id")
|
||||||
|
p.add_argument("--sample",
|
||||||
|
help="only rows whose meta.sample.name matches "
|
||||||
|
"(requires meta.json from a recent commit)")
|
||||||
|
p.add_argument("--since", help="ISO date or datetime; only rows received on/after")
|
||||||
|
p.add_argument("--until", help="ISO date or datetime; only rows received before")
|
||||||
|
p.add_argument("--sort", choices=("time", "size", "host"), default="time")
|
||||||
|
p.add_argument("--count-by",
|
||||||
|
choices=("host_id", "schema_version"),
|
||||||
|
help="instead of printing rows, group + count by this field")
|
||||||
|
p.add_argument("--limit", type=int, default=0,
|
||||||
|
help="cap output rows (0 = all)")
|
||||||
|
args = p.parse_args(argv)
|
||||||
|
|
||||||
|
path = Path(args.index)
|
||||||
|
if not path.exists():
|
||||||
|
print(f"no index at {path}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
since = _parse_since(args.since) if args.since else None
|
||||||
|
until = _parse_since(args.until) if args.until else None
|
||||||
|
|
||||||
|
rows: list[dict] = []
|
||||||
|
with path.open() as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
row = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
if args.host and row.get("host_id") != args.host:
|
||||||
|
continue
|
||||||
|
if since or until:
|
||||||
|
t = _row_time(row)
|
||||||
|
if t is None:
|
||||||
|
continue
|
||||||
|
if since and t < since:
|
||||||
|
continue
|
||||||
|
if until and t >= until:
|
||||||
|
continue
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
if args.count_by:
|
||||||
|
counts = Counter(r.get(args.count_by, "<missing>") for r in rows)
|
||||||
|
for k, n in counts.most_common():
|
||||||
|
print(f"{n:>6} {k}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
sort_keys = {
|
||||||
|
"time": lambda r: r.get("received_at_wall", ""),
|
||||||
|
"size": lambda r: r.get("size_bytes", 0),
|
||||||
|
"host": lambda r: r.get("host_id", ""),
|
||||||
|
}
|
||||||
|
rows.sort(key=sort_keys[args.sort])
|
||||||
|
if args.limit:
|
||||||
|
rows = rows[-args.limit:] if args.sort != "size" else rows[:args.limit]
|
||||||
|
|
||||||
|
# Print TSV-ish for quick eyeballing + downstream pipe-friendliness.
|
||||||
|
print("received_at_wall\thost_id\tepisode_id\tsize_bytes\tschema_version\tsha256")
|
||||||
|
for r in rows:
|
||||||
|
print("\t".join(str(r.get(k, "")) for k in
|
||||||
|
("received_at_wall", "host_id", "episode_id",
|
||||||
|
"size_bytes", "schema_version", "sha256")))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
|
|
@ -1,8 +1,19 @@
|
||||||
"""Plot a single episode's envelope.
|
"""Plot a single episode's envelope.
|
||||||
|
|
||||||
Reads ``telemetry-proc.jsonl`` and ``labels.jsonl`` from an episode directory
|
Renders a multi-panel chart from whatever telemetry the episode dir
|
||||||
and renders a 3-panel chart: CPU%, RSS, IO write rate, with phase bands
|
contains, with phase bands underneath each panel:
|
||||||
underneath.
|
|
||||||
|
panel 1 — host /proc CPU% (source 1, always)
|
||||||
|
panel 2 — host /proc RSS (source 1, always)
|
||||||
|
panel 3 — host /proc IO write (source 1, always)
|
||||||
|
panel 4 — QMP block I/O ops (source 2, if telemetry-qmp.jsonl)
|
||||||
|
panel 5 — perf IPC + miss-rate (source 3, if telemetry-perf.jsonl)
|
||||||
|
panel 6 — bridge pcap pkts/s (source 4, if netflow.jsonl)
|
||||||
|
panel 7 — guest agent CPU/load (source 5, if telemetry-guest.jsonl)
|
||||||
|
|
||||||
|
Missing sources are silently skipped — a Tier-1 episode dir with only
|
||||||
|
proc telemetry still gets the original 3-panel plot. A Tier-3+ run
|
||||||
|
with all five sources gets the full stack on a shared time axis.
|
||||||
|
|
||||||
Two modes:
|
Two modes:
|
||||||
|
|
||||||
|
|
@ -103,21 +114,77 @@ def main() -> int:
|
||||||
end = labels[i + 1]["t_mono_ns"] / 1e9 if i + 1 < len(labels) else end_t
|
end = labels[i + 1]["t_mono_ns"] / 1e9 if i + 1 < len(labels) else end_t
|
||||||
spans.append((start, end, lbl["phase"]))
|
spans.append((start, end, lbl["phase"]))
|
||||||
|
|
||||||
fig, axes = plt.subplots(3, 1, figsize=(13, 8), sharex=True)
|
# Discover optional sources.
|
||||||
|
qmp_rows = _load_jsonl(d / "telemetry-qmp.jsonl") if (d / "telemetry-qmp.jsonl").exists() else []
|
||||||
|
perf_rows = _load_jsonl(d / "telemetry-perf.jsonl") if (d / "telemetry-perf.jsonl").exists() else []
|
||||||
|
netflow_rows = _load_jsonl(d / "netflow.jsonl") if (d / "netflow.jsonl").exists() else []
|
||||||
|
guest_rows = _load_jsonl(d / "telemetry-guest.jsonl") if (d / "telemetry-guest.jsonl").exists() else []
|
||||||
|
|
||||||
axes[0].plot(t, cpu_pct, color="#222222", linewidth=1.0)
|
panels: list[tuple[str, callable]] = [] # (ylabel, plot_fn(ax))
|
||||||
axes[0].set_ylabel("CPU %")
|
panels.append(("CPU % (proc)", lambda ax: (
|
||||||
axes[0].set_ylim(-3, 110)
|
ax.plot(t, cpu_pct, color="#222222", linewidth=1.0),
|
||||||
axes[0].grid(alpha=0.25)
|
ax.set_ylim(-3, 110),
|
||||||
|
)))
|
||||||
|
panels.append(("RSS (MiB)", lambda ax: ax.plot(t, rss_mib, color="#222222", linewidth=1.0)))
|
||||||
|
panels.append(("IO write (KiB/s)", lambda ax: ax.plot(t, io_kb_s, color="#222222", linewidth=1.0)))
|
||||||
|
|
||||||
axes[1].plot(t, rss_mib, color="#222222", linewidth=1.0)
|
if qmp_rows:
|
||||||
axes[1].set_ylabel("RSS (MiB)")
|
qt = [r["t_mono_ns"] / 1e9 for r in qmp_rows]
|
||||||
axes[1].grid(alpha=0.25)
|
# Sum block I/O ops across devices.
|
||||||
|
wr_ops = []
|
||||||
|
rd_ops = []
|
||||||
|
for r in qmp_rows:
|
||||||
|
bs = r.get("blockstats") or {}
|
||||||
|
wr_ops.append(sum(d.get("wr_ops", 0) for d in bs.values()))
|
||||||
|
rd_ops.append(sum(d.get("rd_ops", 0) for d in bs.values()))
|
||||||
|
panels.append(("QMP block ops (cum)", lambda ax: (
|
||||||
|
ax.plot(qt, wr_ops, color="#cc4444", linewidth=1.0, label="wr_ops"),
|
||||||
|
ax.plot(qt, rd_ops, color="#4488cc", linewidth=1.0, label="rd_ops"),
|
||||||
|
ax.legend(loc="upper left", fontsize=8),
|
||||||
|
)))
|
||||||
|
|
||||||
axes[2].plot(t, io_kb_s, color="#222222", linewidth=1.0)
|
if perf_rows:
|
||||||
axes[2].set_ylabel("IO write (KiB/s)")
|
pt = [r["t_mono_ns"] / 1e9 for r in perf_rows]
|
||||||
axes[2].set_xlabel("time (s)")
|
ipc = [r.get("ipc") or 0 for r in perf_rows]
|
||||||
axes[2].grid(alpha=0.25)
|
miss = [r.get("cache_miss_rate") or 0 for r in perf_rows]
|
||||||
|
panels.append(("perf IPC / miss-rate", lambda ax: (
|
||||||
|
ax.plot(pt, ipc, color="#222222", linewidth=1.0, label="IPC"),
|
||||||
|
ax.plot(pt, miss, color="#cc4444", linewidth=1.0, label="cache miss rate"),
|
||||||
|
ax.legend(loc="upper right", fontsize=8),
|
||||||
|
)))
|
||||||
|
|
||||||
|
if netflow_rows:
|
||||||
|
nt = [r["t_mono_ns"] / 1e9 for r in netflow_rows]
|
||||||
|
pkts = [(r.get("pkts_in", 0) + r.get("pkts_out", 0)) for r in netflow_rows]
|
||||||
|
synf = [r.get("syn_count", 0) for r in netflow_rows]
|
||||||
|
panels.append(("bridge pkts / SYNs (per 100 ms)", lambda ax: (
|
||||||
|
ax.plot(nt, pkts, color="#222222", linewidth=1.0, label="pkts"),
|
||||||
|
ax.plot(nt, synf, color="#cc4444", linewidth=1.0, label="syn"),
|
||||||
|
ax.legend(loc="upper right", fontsize=8),
|
||||||
|
)))
|
||||||
|
|
||||||
|
if guest_rows:
|
||||||
|
gt = [r["t_mono_ns"] / 1e9 for r in guest_rows]
|
||||||
|
load1 = [(r.get("load_1m_5m_15m") or [0])[0] for r in guest_rows]
|
||||||
|
mem_used = [
|
||||||
|
((r.get("mem_total_bytes") or 0) - (r.get("mem_available_bytes") or 0)) / (1024 * 1024)
|
||||||
|
for r in guest_rows
|
||||||
|
]
|
||||||
|
panels.append(("guest load1 / mem_used (MiB)", lambda ax: (
|
||||||
|
ax.plot(gt, load1, color="#222222", linewidth=1.0, label="load1"),
|
||||||
|
ax.twinx().plot(gt, mem_used, color="#4488cc", linewidth=1.0, label="mem MiB"),
|
||||||
|
)))
|
||||||
|
|
||||||
|
n = len(panels)
|
||||||
|
fig, axes = plt.subplots(n, 1, figsize=(13, 2 + 1.6 * n), sharex=True)
|
||||||
|
if n == 1:
|
||||||
|
axes = [axes]
|
||||||
|
|
||||||
|
for ax, (ylabel, plot_fn) in zip(axes, panels):
|
||||||
|
plot_fn(ax)
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
ax.grid(alpha=0.25)
|
||||||
|
axes[-1].set_xlabel("time (s)")
|
||||||
|
|
||||||
for ax in axes:
|
for ax in axes:
|
||||||
for start, end, phase in spans:
|
for start, end, phase in spans:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue