fix: revert speculative fleet picker change — was producing dishonest labels

Empirical evidence from k-gamingcom (commit 4ab5477, 2026-05-03 22:20Z
vsftpd_234_backdoor episode): the picker selected vsftpd because BRIDGE
was set on that host. The exploit fires against target_ip=127.0.0.1
(SLIRP loopback) but vsftpd's hardcoded port-6200 backdoor is reachable
only at the guest's bridge IP. Result: session_open_timeout, AND a
schedule-clock-driven `infected_running` label was still written for
the failed exploit — exactly the §10 poisoned-training-example pattern.

Until guest-IP discovery for bridge mode is wired (a separate piece of
infrastructure), bridge-only modules can't actually reach their target
even when the operator sets BRIDGE for Tier-2's pcap source. Revert
the picker to its prior conservative form: drop requires_bridge modules
unconditionally regardless of BRIDGE state. Same for the BRIDGE env
strip in the Tier-3 launch path — it was correct as unconditional.

Replaces the two aspirational tests
(test_fleet_uses_all_modules_when_bridge_set,
test_fleet_propagates_bridge_env_to_runner) with their honest negatives
(test_tier3_drops_requires_bridge_modules_unconditionally,
test_tier3_strips_bridge_env_even_when_set). The previous tests asserted
behavior the rest of the pipeline can't deliver; they were false signals.

229 passed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Max Gorog 2026-05-03 17:58:43 -05:00
parent ac7b85ff8d
commit 0390eb20b6
2 changed files with 66 additions and 45 deletions

View file

@ -244,20 +244,25 @@ def _run_slot(
# Decide tier. # Decide tier.
# Tier-3 modules split into two classes by `requires_bridge`: # Tier-3 modules split into two classes by `requires_bridge`:
# - bind/reverse-shell payloads under SLIRP need only loopback # - SLIRP-friendly bind shells like samba_usermap_script's
# hostfwd (samba_usermap_script with bind_perl, etc.). # cmd/unix/bind_perl (handler connects in over hostfwd).
# - modules with hardcoded callback ports or guest-driven # - Bridge-only modules (vsftpd's port-6200 backdoor, distccd,
# callbacks (vsftpd's port-6200 backdoor, distccd, php_cgi, # php_cgi, unreal_ircd) where the handler must reach the
# unreal_ircd) need a bridge so each guest gets its own IP. # guest at its own bridge IP.
# When the operator sets BRIDGE (= bridge configured + tap # The bridge-only set is filtered out unconditionally because the
# available), every module is usable. Without BRIDGE we drop the # rest of the pipeline currently passes target_ip=127.0.0.1 (SLIRP
# bridge-only ones — running them under SLIRP would either fail # loopback) regardless of bridge mode, so bridge-only modules
# to land or collide on shared loopback ports across slots. # land in target_ip mismatches that produce session_open_timeout
bridge_set = bool(os.environ.get("BRIDGE")) # AND a dishonest infected_running label (PIPELINE.md §10). When
# target-IP discovery from the guest's bridge lease lands, this
# filter can be made conditional on `bridge_set` again. See the
# 2026-05-03 vsftpd_234_backdoor episode (commit 4ab5477) on
# k-gamingcom for the empirical evidence the conditional version
# produced poisoned labels.
usable_modules: dict[str, ModuleConfig] = ( usable_modules: dict[str, ModuleConfig] = (
dict(cfg.modules) if bridge_set {k: v for k, v in cfg.modules.items() if not v.requires_bridge}
else {k: v for k, v in cfg.modules.items() if not v.requires_bridge} if cfg.modules else {}
) if cfg.modules else {} )
tier3_ready = ( tier3_ready = (
not cfg.force_tier2 not cfg.force_tier2
and bool(usable_modules) and bool(usable_modules)
@ -309,15 +314,13 @@ def _run_slot(
target_ports += f",{extra_host_port}:{extra_host_port}" target_ports += f",{extra_host_port}:{extra_host_port}"
env["FLEET_PAYLOAD_LPORT"] = str(extra_host_port) env["FLEET_PAYLOAD_LPORT"] = str(extra_host_port)
env["TARGET_PORTS"] = target_ports env["TARGET_PORTS"] = target_ports
# When BRIDGE is unset, force SLIRP+hostfwd; when it IS set we # Tier-3 always uses SLIRP+hostfwd. Strip BRIDGE so a host that
# keep it so requires_bridge modules (vsftpd backdoor on the # has BRIDGE set for Tier-2 (pcap source 4) doesn't accidentally
# hardcoded port 6200, distccd, etc.) can reach the guest via # propagate it into the Tier-3 launch_target.sh, which would try
# its own bridge IP. Refs Bug 1 in TIER3-BRINGUP.md (BRIDGE # tap mode without the matching guest-IP discovery wired (see
# leaking from Tier-2 into Tier-3 broke things) — that fix was # the usable_modules comment above for the matching reason this
# too aggressive; it stripped BRIDGE even when the module # has to stay strict).
# legitimately needed it. env.pop("BRIDGE", None)
if not bridge_set:
env.pop("BRIDGE", None)
cmd = [ cmd = [
py, py,
str(cfg.repo_root / "tools" / "run_tier3_demo.py"), str(cfg.repo_root / "tools" / "run_tier3_demo.py"),

View file

@ -323,29 +323,19 @@ def test_fleet_skips_requires_bridge_modules_when_no_bridge(monkeypatch, tmp_pat
f"selected callback modules without BRIDGE: {seen_modules & callback_modules}" f"selected callback modules without BRIDGE: {seen_modules & callback_modules}"
def test_fleet_uses_all_modules_when_bridge_set(monkeypatch, tmp_path) -> None: def test_tier3_strips_bridge_env_even_when_set(monkeypatch, tmp_path) -> None:
"""With BRIDGE set, the full catalog (including reverse/bind shell """Tier-3 always uses SLIRP+hostfwd because the rest of the pipeline
payloads) is in rotation.""" passes target_ip=127.0.0.1 regardless of bridge mode (no guest-IP
from orchestrator import fleet discovery wired). If BRIDGE leaks into launch_target.sh's env, the
cfg = _fleet_cfg_with_modules(tmp_path) target VM goes into tap mode without the matching IP discovery and
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True) every exploit times out against 127.0.0.1 producing dishonest
monkeypatch.setenv("BRIDGE", "br-malware") infected_running labels (PIPELINE.md §10). Strip BRIDGE from the
_patch_subprocess(monkeypatch) Tier-3 subprocess env even when the operator set it for Tier-2.
capacity = fleet.detect_capacity()
sample = cfg.manifest.samples[0] Regression for: 2026-05-03 vsftpd_234_backdoor episode on
seen = set() k-gamingcom (commit 4ab5477) picker selected vsftpd because
for ep in range(40): BRIDGE was set, episode timed out, schedule-clock wrote
res = fleet._run_slot(cfg, slot=0, sample=sample, episode_index=ep, capacity=capacity) `infected_running` for an exploit that never landed."""
if res.tier == "tier3" and res.module_name:
seen.add(res.module_name)
assert seen == set(cfg.modules.keys()), \
f"only saw {seen}/{set(cfg.modules.keys())}"
def test_fleet_propagates_bridge_env_to_runner(monkeypatch, tmp_path) -> None:
"""When BRIDGE is set in the parent env, the per-slot subprocess
env must carry it through so launch_target.sh enters tap+bridge mode."""
from orchestrator import fleet from orchestrator import fleet
cfg = _fleet_cfg_with_modules(tmp_path) cfg = _fleet_cfg_with_modules(tmp_path)
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True) monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
@ -354,7 +344,35 @@ def test_fleet_propagates_bridge_env_to_runner(monkeypatch, tmp_path) -> None:
capacity = fleet.detect_capacity() capacity = fleet.detect_capacity()
sample = cfg.manifest.samples[0] sample = cfg.manifest.samples[0]
fleet._run_slot(cfg, slot=0, sample=sample, episode_index=0, capacity=capacity) fleet._run_slot(cfg, slot=0, sample=sample, episode_index=0, capacity=capacity)
assert _RecordingPopen.calls[-1]["env"]["BRIDGE"] == "br-malware" assert "BRIDGE" not in _RecordingPopen.calls[-1]["env"]
def test_tier3_drops_requires_bridge_modules_unconditionally(monkeypatch, tmp_path) -> None:
"""Picker MUST drop requires_bridge modules even when BRIDGE is set,
because the rest of the pipeline can't actually use them yet (no
guest-IP discovery for bridge mode). Until that's wired, including
them produces session_open_timeout + dishonest labels.
Asserts the picker only ever returns the SLIRP-friendly subset
across many episodes regardless of BRIDGE state."""
from orchestrator import fleet
cfg = _fleet_cfg_with_modules(tmp_path)
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
monkeypatch.setenv("BRIDGE", "br-malware")
_patch_subprocess(monkeypatch)
capacity = fleet.detect_capacity()
slirp_friendly = {k for k, v in cfg.modules.items() if not v.requires_bridge}
sample = cfg.manifest.samples[0]
seen = set()
for ep in range(40):
res = fleet._run_slot(cfg, slot=0, sample=sample,
episode_index=ep, capacity=capacity)
if res.tier == "tier3" and res.module_name:
seen.add(res.module_name)
assert seen <= slirp_friendly, (
f"picker returned bridge-only modules {seen - slirp_friendly}; "
f"these can't reach the guest with target_ip=127.0.0.1"
)
def test_fleet_assigns_unique_port_base_per_slot(monkeypatch, tmp_path) -> None: def test_fleet_assigns_unique_port_base_per_slot(monkeypatch, tmp_path) -> None: