CIS490/exploits/modules.py
Elliott Kolden 667f042707 Tier-3 bring-up: 9 bugs fixed on elliott-ThinkPad (2026-05-01)
Root causes and fixes documented in TIER3-BRINGUP.md. Summary:

1. BRIDGE env var leaked into Tier-3 subprocess → target VM used tap
   instead of SLIRP; fix: env.pop("BRIDGE") in fleet _run_slot.

2. usable_modules filter conditioned on BRIDGE presence → bridge-requiring
   modules selected on SLIRP runs; fix: always filter requires_bridge.

3. cmd/unix/interact creates no session.list entry → session_open_timeout
   every episode; fix: switch samba_usermap_script to cmd/unix/bind_perl.

4. Per-slot LPORT hostfwd used wrong guest port (host:5444→guest:4444);
   fix: extra_host_port:extra_host_port mapping so guest binds the
   per-slot LPORT directly.

5. vsftpd backdoor port 6200 hardcoded → collision across concurrent slots;
   fix: requires_bridge=true filters it from SLIRP fleet runs.

6. SLIRP false-positive in _wait_for_tcp → exploit fires before Samba
   boots (~60 s too early); fix: replace TCP probe with serial console
   _wait_for_serial_login that waits for actual "login:" prompt.

7. Stale QEMU survives orchestrator restart (start_new_session=True) →
   holds hostfwd ports, new QEMU silently fails; fix: kill by pgid from
   old pidfile before rmtree.

8. PORT_BASE default used privileged port 21; fix: default to 2021+slot*100.

9. msfrpcd 6.x returns bytes for all string values even with raw=False;
   fix: MSFRpcClient._str() recursive decoder applied to all responses.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 12:26:19 -06:00

155 lines
6.2 KiB
Python

"""TOML loader for exploit-module configs.
Each ``exploits/modules/*.toml`` describes one Metasploit module — its
path, the options to set, the payload to use, and how the driver
should treat the resulting session. The driver consumes ``ModuleConfig``
objects; the TOML files are the on-disk source of truth.
Why TOML and not msfconsole ``.rc`` scripts? ``.rc`` scripts are
imperative and assume an interactive console; the driver needs the
*structured* options to push them through msfrpc. TOML is the simplest
way to express a small typed map of options — and it round-trips
cleanly into ``meta.json`` for episode reproducibility.
Per-(host, slot, episode) selection mirrors the sample-manifest
selector: we want different vulnerabilities exercised across hosts
and waves so the trained model sees a diverse corpus of
``armed → infecting`` transition shapes, not just the same FTP
backdoor every run.
"""
from __future__ import annotations
import hashlib
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
_VALID_MODULE_TYPES = {"exploit", "auxiliary", "post"}
@dataclass(frozen=True)
class ModuleConfig:
name: str # short id, e.g. "vsftpd_234_backdoor"
module_type: str # "exploit" | "auxiliary" | "post"
module_path: str # e.g. "unix/ftp/vsftpd_234_backdoor"
options: dict[str, Any] = field(default_factory=dict)
payload_path: str | None = None # e.g. "cmd/unix/interact"
payload_options: dict[str, Any] = field(default_factory=dict)
expected_session_type: str = "shell" # what we'll get on success
description: str = ""
# When true the module's payload uses a callback channel (reverse
# or bind shell) and won't land a session under SLIRP+restrict=on.
# The fleet runner skips these unless BRIDGE is set so episodes
# that fire them actually produce data.
requires_bridge: bool = False
# Guest ports the fleet must also hostfwd (in addition to RPORT).
# Used for bind-shell payloads where the handler connects to a
# separate port. Fleet calculates per-slot host ports and sets
# FLEET_PAYLOAD_LPORT so the driver can override LPORT at fire time.
extra_target_ports: tuple[int, ...] = ()
def render_options(self, *, target_ip: str) -> dict[str, Any]:
"""Substitute ``{{ target_ip }}`` placeholders in options.
Module configs use Jinja-style placeholders for any value that
isn't known until episode time (RHOSTS, LHOST, etc.). Today the
only supported placeholder is ``target_ip``; if more are needed
later, generalize here."""
out: dict[str, Any] = {}
for k, v in self.options.items():
if isinstance(v, str) and "{{" in v:
out[k] = (
v.replace("{{ target_ip }}", target_ip)
.replace("{{target_ip}}", target_ip)
)
else:
out[k] = v
# MSF requires PAYLOAD as a top-level option even though we
# carry it in a separate field on the config.
if self.payload_path:
out["PAYLOAD"] = self.payload_path
for k, v in self.payload_options.items():
if isinstance(v, str) and "{{" in v:
v = (
v.replace("{{ target_ip }}", target_ip)
.replace("{{target_ip}}", target_ip)
)
out[k] = v
return out
def load_module_config(path: Path) -> ModuleConfig:
raw = tomllib.loads(path.read_text())
mod = raw.get("module") or {}
module_path = mod.get("path")
module_type = mod.get("type", "exploit")
if not isinstance(module_path, str) or not module_path:
raise ValueError(f"{path}: module.path must be a non-empty string")
if module_type not in _VALID_MODULE_TYPES:
raise ValueError(
f"{path}: module.type {module_type!r} not in {_VALID_MODULE_TYPES}"
)
options = (raw.get("module", {}).get("options") or {}) | (raw.get("options") or {})
payload = raw.get("payload") or {}
return ModuleConfig(
name=path.stem,
module_type=module_type,
module_path=module_path,
options=dict(options),
payload_path=payload.get("path"),
payload_options=dict(payload.get("options") or {}),
expected_session_type=raw.get("session", {}).get("type", "shell"),
description=raw.get("description", ""),
requires_bridge=bool(raw.get("runtime", {}).get("requires_bridge", False)),
extra_target_ports=tuple(
int(p) for p in raw.get("runtime", {}).get("extra_target_ports", [])
),
)
def load_module_configs(directory: Path) -> dict[str, ModuleConfig]:
"""Load every ``*.toml`` under ``directory``, keyed by short name."""
return {
p.stem: load_module_config(p)
for p in sorted(directory.glob("*.toml"))
}
def select_module(
catalog: dict[str, ModuleConfig],
*,
host_id: str,
slot: int,
episode_index: int,
) -> ModuleConfig:
"""Deterministic per-(host, slot, ep) module selector. Mirrors
SampleManifest.select() so the entry vector rotates the same way
the post-infection workload does. Two hosts hash to different
modules at the same slot/episode (collision rate ~1/N); a single
host walks the full catalog within ~len(catalog) episodes.
Inputs reduce to a SHA-256 keyed lookup so runs replay
bit-identically given the same (host, slot, ep) tuple."""
if not catalog:
raise ValueError("module catalog is empty")
keys = sorted(catalog.keys())
seed = f"module|{host_id}|{slot}|{episode_index}".encode()
h = hashlib.sha256(seed).digest()
idx = int.from_bytes(h[:8], "big") % len(keys)
return catalog[keys[idx]]
def module_target_port(module: ModuleConfig) -> int | None:
"""Pull the RPORT off a module config. Used by the fleet runner
to wire the launcher's hostfwd to the right service inside the
target VM (vsftpd:21, samba:139, php-cgi:80, distccd:3632,
unrealircd:6667)."""
rport = module.options.get("RPORT")
if isinstance(rport, int):
return rport
if isinstance(rport, str) and rport.isdigit():
return int(rport)
return None