Root causes and fixes documented in TIER3-BRINGUP.md. Summary:
1. BRIDGE env var leaked into Tier-3 subprocess → target VM used tap
instead of SLIRP; fix: env.pop("BRIDGE") in fleet _run_slot.
2. usable_modules filter conditioned on BRIDGE presence → bridge-requiring
modules selected on SLIRP runs; fix: always filter requires_bridge.
3. cmd/unix/interact creates no session.list entry → session_open_timeout
every episode; fix: switch samba_usermap_script to cmd/unix/bind_perl.
4. Per-slot LPORT hostfwd used wrong guest port (host:5444→guest:4444);
fix: extra_host_port:extra_host_port mapping so guest binds the
per-slot LPORT directly.
5. vsftpd backdoor port 6200 hardcoded → collision across concurrent slots;
fix: requires_bridge=true filters it from SLIRP fleet runs.
6. SLIRP false-positive in _wait_for_tcp → exploit fires before Samba
boots (~60 s too early); fix: replace TCP probe with serial console
_wait_for_serial_login that waits for actual "login:" prompt.
7. Stale QEMU survives orchestrator restart (start_new_session=True) →
holds hostfwd ports, new QEMU silently fails; fix: kill by pgid from
old pidfile before rmtree.
8. PORT_BASE default used privileged port 21; fix: default to 2021+slot*100.
9. msfrpcd 6.x returns bytes for all string values even with raw=False;
fix: MSFRpcClient._str() recursive decoder applied to all responses.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
112 lines
3.8 KiB
Python
112 lines
3.8 KiB
Python
"""``cis490-fleet`` — run as many concurrent labeled episodes as the
|
|
host can handle, drawing samples from the manifest.
|
|
|
|
Modes:
|
|
|
|
--capacity Print the resource calculation and exit. No VMs spawned.
|
|
--waves N Run N waves of episodes (one wave = max_concurrent
|
|
episodes, each in its own slot). Default: 1.
|
|
--max-concurrent N
|
|
Cap concurrency below the auto-detected ceiling.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import signal
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Allow running as a script.
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
from exploits.modules import load_module_configs # noqa: E402
|
|
from orchestrator.fleet import ( # noqa: E402
|
|
FleetConfig, FleetRunner, capacity_report, detect_capacity,
|
|
)
|
|
from samples.manifest import SampleManifest # noqa: E402
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
p = argparse.ArgumentParser(prog="cis490-fleet")
|
|
p.add_argument("--capacity", action="store_true")
|
|
p.add_argument("--waves", type=int, default=1)
|
|
p.add_argument("--max-concurrent", type=int, default=None)
|
|
p.add_argument("--manifest",
|
|
default=str(Path(__file__).resolve().parent.parent / "samples" / "manifest.toml"))
|
|
p.add_argument("--modules-dir",
|
|
default=str(Path(__file__).resolve().parent.parent / "exploits" / "modules"))
|
|
p.add_argument("--data-root", default="data")
|
|
p.add_argument("--host-id", default=os.environ.get("FLEET_HOST_ID") or os.uname().nodename)
|
|
p.add_argument("--ram-per-vm-mib", type=int, default=320)
|
|
p.add_argument("--require-real-samples", action="store_true")
|
|
p.add_argument("--force-tier2", action="store_true",
|
|
help="Skip Tier 3 even when msfrpcd is reachable")
|
|
p.add_argument("--max-tier3-slots", type=int, default=None,
|
|
help="Cap concurrent Tier-3 slots; slots >= N fall back to Tier-2")
|
|
p.add_argument("--log-level", default="INFO")
|
|
args = p.parse_args(argv)
|
|
|
|
logging.basicConfig(
|
|
level=getattr(logging, args.log_level.upper(), logging.INFO),
|
|
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
|
)
|
|
|
|
if args.capacity:
|
|
print(capacity_report())
|
|
return 0
|
|
|
|
manifest = SampleManifest.load(args.manifest)
|
|
repo_root = Path(__file__).resolve().parent.parent
|
|
modules_dir = Path(args.modules_dir)
|
|
modules = load_module_configs(modules_dir) if modules_dir.exists() else {}
|
|
|
|
cfg = FleetConfig(
|
|
host_id=args.host_id,
|
|
repo_root=repo_root,
|
|
data_root=Path(args.data_root).resolve(),
|
|
manifest=manifest,
|
|
modules=modules,
|
|
ram_per_vm_mib=args.ram_per_vm_mib,
|
|
max_concurrent_override=args.max_concurrent,
|
|
require_real_samples=args.require_real_samples,
|
|
force_tier2=args.force_tier2,
|
|
max_tier3_slots=args.max_tier3_slots,
|
|
)
|
|
|
|
runner = FleetRunner(cfg)
|
|
|
|
def _stop(signum, frame): # noqa: ARG001
|
|
runner.stop()
|
|
signal.signal(signal.SIGTERM, _stop)
|
|
signal.signal(signal.SIGINT, _stop)
|
|
|
|
result = runner.run(episodes=args.waves)
|
|
|
|
print(json.dumps({
|
|
"host_id": args.host_id,
|
|
"capacity": result.capacity.to_dict(),
|
|
"modules_loaded": sorted(modules.keys()),
|
|
"slots": [
|
|
{
|
|
"slot": s.slot,
|
|
"sample": s.sample_name,
|
|
"sample_kind": s.sample_kind,
|
|
"tier": s.tier,
|
|
"module": s.module_name,
|
|
"rc": s.rc,
|
|
"duration_s": s.duration_s,
|
|
"error": s.error,
|
|
} for s in result.slots
|
|
],
|
|
"total_duration_s": result.total_duration_s,
|
|
}, indent=2))
|
|
|
|
return 0 if all(s.rc == 0 for s in result.slots) else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|