This is the chunk that makes "real data" actually flow on multiple
hosts in parallel. End-to-end pipe was up at 613c6fa / 2579683; now
the lab-host side has the diversity + concurrency it needs.
Collectors landed:
collectors/qmp.py — source 2 (oracle). Tiny synchronous QMP
client + row builder + run loop. Tolerates
older qemu without query-stats.
collectors/guest_agent.py — source 5 (deployable). Reads the
virtio-serial host-side socket, parses
agent JSON-lines, re-stamps to the host
monotonic clock, persists.
collectors/pcap.py — source 4 (deployable). tcpdump capture
+ pure-Python pcap reader + 100 ms
netflow.jsonl bucketizer. Decodes
Ethernet/IPv4/TCP/UDP enough for the
schema in docs/data-model.md.
In-guest agent:
vm/guest-agent/cis490_agent.py — stdlib-only Python agent. Reads
/proc/{stat,meminfo,loadavg,net/dev,net/tcp*}, top-N RSS procs,
thermal. Writes JSON-lines to /dev/virtio-ports/cis490.guest.agent.
tools/build_cidata.py — embeds the agent + an OpenRC service into
user-data so first boot of the Alpine cidata image auto-starts it.
Launchers:
vm/launch_demo.sh / launch_target.sh — second virtio-serial port for
the agent socket; SLOT env support so multiple VMs run without
socket / port collisions; PORT_BASE on launch_target so multiple
target VMs hostfwd different host ports.
vm/setup_bridge.sh — creates host-only br-malware (10.200.0.1/24,
no NAT). Idempotent.
Fleet:
orchestrator/fleet.py — capacity detector (cores / RAM / load
headroom) + concurrent-slot runner. Per-slot ENV selects the
sample. FleetCapacity dataclass round-trips into meta.json so
"this episode ran with 6 concurrent VMs" is auditable post-hoc.
tools/run_fleet.py — CLI: --capacity report; --waves N runs N
waves of (max_concurrent) episodes each, every slot with a
different sample.
etc/cis490-orchestrator.service — now drives the fleet runner with
Restart=always so each invocation runs one wave and respawns,
giving a continuous stream.
Samples:
samples/manifest.toml — six profiles spanning the five major
behaviour shapes. Each entry is real OR mimic (sha256 distinguishes).
samples/manifest.py — strict TOML loader (rejects dups, unknown
categories) + deterministic select(host_id, slot, episode_index)
so different hosts on the network walk the catalog in different
orders without any coordinator.
EpisodeRunner:
orchestrator/episode.py — optional qmp_socket + guest_agent_socket
fields on EpisodeConfig; when set, additional collector threads
run alongside proc_qemu. EpisodeResult now carries rows_qmp +
rows_guest counters.
Tier-3 setup automation:
scripts/install-msfrpcd.sh — installs metasploit-framework where
the package manager has it, generates a strong password into
/etc/cis490/msfrpc.env, drops a hardened systemd unit bound to
127.0.0.1:55553. After this, run_tier3_demo.py works zero-touch
once MSFRPC_PASSWORD is sourced.
scripts/fetch-metasploitable2.sh — accepts IMAGE_URL + IMAGE_SHA256
from the operator (Rapid7 download is registration-walled), pulls,
verifies, converts vmdk → qcow2, lands at vm/images/.
Tests: 82 pass (was 51). New suites:
tests/test_qmp.py — fake QMP server, capability handshake,
blockstats, async-event interleaving,
5-failure backoff
tests/test_guest_agent.py — fake virtio socket, JSON-lines read +
re-stamp, malformed-line tolerance
tests/test_pcap.py — synthetic pcap with TCP/UDP/ARP frames,
bucketize correctness across windows
tests/test_fleet.py — capacity math (8-core idle / low-RAM /
high-load / Pi5 / 1-core box), manifest
selection determinism + diversity
What's queued for the next commit (already discussed in convo):
- MSFExploitDriver v2: map sample.profile → distinct in-session
workload so Tier-3 episodes don't all produce the same yes-loop
envelope. Critical for ML to learn varied malware shapes.
- Real-sample fetch from MalwareBazaar by sha256.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
204 lines
6.9 KiB
Python
204 lines
6.9 KiB
Python
"""Tests for fleet capacity calculation + sample manifest selection.
|
|
|
|
Capacity is unit-tested via deterministic monkeypatching of /proc and
|
|
os.cpu_count so the math is exercised independently of the host
|
|
running the suite. Sample selection has its own tests covering the
|
|
"different hosts pick different samples" property.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from orchestrator import fleet
|
|
from samples.manifest import Sample, SampleManifest
|
|
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Capacity
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _patch_capacity_inputs(
|
|
monkeypatch,
|
|
*,
|
|
cores: int,
|
|
ram_total_mib: int,
|
|
ram_available_mib: int,
|
|
load_1m: float = 0.0,
|
|
) -> None:
|
|
monkeypatch.setattr(fleet.os, "cpu_count", lambda: cores)
|
|
monkeypatch.setattr(
|
|
fleet, "_read_meminfo",
|
|
lambda: {
|
|
"MemTotal": ram_total_mib * 1024 * 1024,
|
|
"MemAvailable": ram_available_mib * 1024 * 1024,
|
|
},
|
|
)
|
|
monkeypatch.setattr(fleet, "_read_loadavg", lambda: load_1m)
|
|
|
|
|
|
def test_capacity_8core_idle_box(monkeypatch) -> None:
|
|
_patch_capacity_inputs(monkeypatch, cores=8, ram_total_mib=16384, ram_available_mib=14000)
|
|
c = fleet.detect_capacity(ram_per_vm_mib=320)
|
|
assert c.cores_total == 8
|
|
assert c.cores_reserved == 1 # 8 // 8 = 1
|
|
assert c.max_by_cores == 7
|
|
# Plenty of RAM, idle → cores binding.
|
|
assert c.max_concurrent == 7
|
|
assert "binding=cores" in c.rationale
|
|
|
|
|
|
def test_capacity_low_ram_caps_below_cores(monkeypatch) -> None:
|
|
# 8 cores but only ~2 GiB free → ram caps below cores.
|
|
_patch_capacity_inputs(monkeypatch, cores=8, ram_total_mib=4096, ram_available_mib=2048)
|
|
c = fleet.detect_capacity(ram_per_vm_mib=320)
|
|
# headroom = max(1024, 4096//8) = 1024
|
|
# max_by_ram = (2048 - 1024) // 320 = 3
|
|
assert c.max_by_ram == 3
|
|
assert c.max_concurrent == 3
|
|
|
|
|
|
def test_capacity_high_load_halves_concurrency(monkeypatch) -> None:
|
|
# 8 cores, plenty of RAM, but load_1m / cores > 0.75
|
|
_patch_capacity_inputs(
|
|
monkeypatch, cores=8, ram_total_mib=16384, ram_available_mib=14000,
|
|
load_1m=7.0, # 7/8 = 0.875 > 0.75
|
|
)
|
|
c = fleet.detect_capacity(ram_per_vm_mib=320)
|
|
# max_by_cores = 7; max_by_load = max(1, 7//2) = 3
|
|
assert c.max_by_load == 3
|
|
assert c.max_concurrent == 3
|
|
|
|
|
|
def test_capacity_pi5_class(monkeypatch) -> None:
|
|
"""4 cores + 8 GiB → reserve 1 core, run 3 concurrent."""
|
|
_patch_capacity_inputs(monkeypatch, cores=4, ram_total_mib=7951, ram_available_mib=5223)
|
|
c = fleet.detect_capacity(ram_per_vm_mib=320)
|
|
assert c.cores_total == 4
|
|
assert c.max_concurrent == 3
|
|
|
|
|
|
def test_capacity_minimal_box(monkeypatch) -> None:
|
|
"""1-core 1 GiB host shouldn't try to run any VMs."""
|
|
_patch_capacity_inputs(monkeypatch, cores=1, ram_total_mib=1024, ram_available_mib=512)
|
|
c = fleet.detect_capacity(ram_per_vm_mib=320)
|
|
assert c.max_concurrent == 0
|
|
|
|
|
|
def test_capacity_to_dict_round_trips(monkeypatch) -> None:
|
|
_patch_capacity_inputs(monkeypatch, cores=4, ram_total_mib=8000, ram_available_mib=6000)
|
|
c = fleet.detect_capacity(ram_per_vm_mib=320)
|
|
d = c.to_dict()
|
|
assert d["cores_total"] == 4
|
|
assert d["max_concurrent"] == c.max_concurrent
|
|
assert "rationale" in d
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sample manifest
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_repo_manifest_loads() -> None:
|
|
m = SampleManifest.load(REPO_ROOT / "samples" / "manifest.toml")
|
|
assert len(m) >= 4
|
|
# Every entry has required fields.
|
|
for s in m.samples:
|
|
assert s.name and s.family and s.category and s.profile
|
|
# All "mimic" today; will switch as real samples are added.
|
|
assert all(s.kind == "mimic" for s in m.samples)
|
|
|
|
|
|
def test_selection_is_deterministic() -> None:
|
|
m = SampleManifest.load(REPO_ROOT / "samples" / "manifest.toml")
|
|
a = m.select(host_id="lab-1", slot=2, episode_index=5)
|
|
b = m.select(host_id="lab-1", slot=2, episode_index=5)
|
|
assert a is b
|
|
|
|
|
|
def test_selection_differs_across_hosts() -> None:
|
|
"""Two hosts on the same slot/episode should generally hit
|
|
different samples (probabilistic — assert distribution, not
|
|
individual equality).
|
|
"""
|
|
m = SampleManifest.load(REPO_ROOT / "samples" / "manifest.toml")
|
|
if len(m) < 2:
|
|
pytest.skip("manifest too small for diversity check")
|
|
matches = 0
|
|
for slot in range(20):
|
|
a = m.select(host_id="alice", slot=slot, episode_index=0)
|
|
b = m.select(host_id="bob", slot=slot, episode_index=0)
|
|
if a is b:
|
|
matches += 1
|
|
# If the catalog has N samples, naive collision rate ~1/N. With
|
|
# 20 trials and N≥4 we expect ~5 matches; allow up to half.
|
|
assert matches < 15, "host_id seed isn't producing variety"
|
|
|
|
|
|
def test_selection_walks_catalog_across_episodes() -> None:
|
|
"""A single host over many episodes should hit every sample at
|
|
least once."""
|
|
m = SampleManifest.load(REPO_ROOT / "samples" / "manifest.toml")
|
|
seen = set()
|
|
for ep in range(200):
|
|
seen.add(m.select(host_id="lab-x", slot=0, episode_index=ep).name)
|
|
assert len(seen) == len(m), f"only saw {len(seen)}/{len(m)} samples"
|
|
|
|
|
|
def test_manifest_rejects_missing_required_field(tmp_path: Path) -> None:
|
|
p = tmp_path / "bad.toml"
|
|
p.write_text(
|
|
'[[sample]]\n'
|
|
'name = "x"\n'
|
|
'family = "y"\n'
|
|
'# missing category\n'
|
|
'profile = "z"\n'
|
|
)
|
|
with pytest.raises(ValueError, match="category"):
|
|
SampleManifest.load(p)
|
|
|
|
|
|
def test_manifest_rejects_unknown_category(tmp_path: Path) -> None:
|
|
p = tmp_path / "bad.toml"
|
|
p.write_text(
|
|
'[[sample]]\n'
|
|
'name = "x"\n'
|
|
'family = "y"\n'
|
|
'category = "fish"\n'
|
|
'profile = "z"\n'
|
|
)
|
|
with pytest.raises(ValueError, match="category"):
|
|
SampleManifest.load(p)
|
|
|
|
|
|
def test_manifest_rejects_duplicate_names(tmp_path: Path) -> None:
|
|
p = tmp_path / "dup.toml"
|
|
p.write_text(
|
|
'[[sample]]\n'
|
|
'name = "x"\nfamily = "y"\ncategory = "rat"\nprofile = "z"\n'
|
|
'\n[[sample]]\n'
|
|
'name = "x"\nfamily = "y"\ncategory = "rat"\nprofile = "z"\n'
|
|
)
|
|
with pytest.raises(ValueError, match="duplicate"):
|
|
SampleManifest.load(p)
|
|
|
|
|
|
def test_manifest_marks_real_when_sha256_present(tmp_path: Path) -> None:
|
|
p = tmp_path / "real.toml"
|
|
p.write_text(
|
|
'[[sample]]\n'
|
|
'name = "real-one"\nfamily = "y"\ncategory = "rat"\nprofile = "z"\n'
|
|
'sha256 = "abc123"\n'
|
|
'\n[[sample]]\n'
|
|
'name = "mimic-one"\nfamily = "y"\ncategory = "rat"\nprofile = "z"\n'
|
|
)
|
|
m = SampleManifest.load(p)
|
|
by_name = {s.name: s for s in m.samples}
|
|
assert by_name["real-one"].kind == "real"
|
|
assert by_name["mimic-one"].kind == "mimic"
|