fix: revert speculative fleet picker change — was producing dishonest labels

Empirical evidence from k-gamingcom (commit 4ab5477, 2026-05-03 22:20Z vsftpd_234_backdoor episode): the picker selected vsftpd because BRIDGE was set on that host. The exploit fires against target_ip=127.0.0.1 (SLIRP loopback) but vsftpd's hardcoded port-6200 backdoor is reachable only at the guest's bridge IP. Result: session_open_timeout, AND a schedule-clock-driven `infected_running` label was still written for the failed exploit — exactly the §10 poisoned-training-example pattern. Until guest-IP discovery for bridge mode is wired (a separate piece of infrastructure), bridge-only modules can't actually reach their target even when the operator sets BRIDGE for Tier-2's pcap source. Revert the picker to its prior conservative form: drop requires_bridge modules unconditionally regardless of BRIDGE state. Same for the BRIDGE env strip in the Tier-3 launch path — it was correct as unconditional. Replaces the two aspirational tests (test_fleet_uses_all_modules_when_bridge_set, test_fleet_propagates_bridge_env_to_runner) with their honest negatives (test_tier3_drops_requires_bridge_modules_unconditionally, test_tier3_strips_bridge_env_even_when_set). The previous tests asserted behavior the rest of the pipeline can't deliver; they were false signals. 229 passed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 17:58:43 -05:00 · 2026-05-03 17:58:43 -05:00 · 0390eb20b6
commit 0390eb20b6
parent ac7b85ff8d
2 changed files with 66 additions and 45 deletions
--- a/orchestrator/fleet.py
+++ b/orchestrator/fleet.py
@ -244,20 +244,25 @@ def _run_slot(
    # Decide tier.
    # Tier-3 modules split into two classes by `requires_bridge`:
-    #   - bind/reverse-shell payloads under SLIRP need only loopback
+    #   - SLIRP-friendly bind shells like samba_usermap_script's
-    #     hostfwd (samba_usermap_script with bind_perl, etc.).
+    #     cmd/unix/bind_perl (handler connects in over hostfwd).
-    #   - modules with hardcoded callback ports or guest-driven
+    #   - Bridge-only modules (vsftpd's port-6200 backdoor, distccd,
-    #     callbacks (vsftpd's port-6200 backdoor, distccd, php_cgi,
+    #     php_cgi, unreal_ircd) where the handler must reach the
-    #     unreal_ircd) need a bridge so each guest gets its own IP.
+    #     guest at its own bridge IP.
-    # When the operator sets BRIDGE (= bridge configured + tap
+    # The bridge-only set is filtered out unconditionally because the
-    # available), every module is usable. Without BRIDGE we drop the
+    # rest of the pipeline currently passes target_ip=127.0.0.1 (SLIRP
-    # bridge-only ones — running them under SLIRP would either fail
+    # loopback) regardless of bridge mode, so bridge-only modules
-    # to land or collide on shared loopback ports across slots.
+    # land in target_ip mismatches that produce session_open_timeout
-    bridge_set = bool(os.environ.get("BRIDGE"))
+    # AND a dishonest infected_running label (PIPELINE.md §10). When
    # target-IP discovery from the guest's bridge lease lands, this
    # filter can be made conditional on `bridge_set` again. See the
    # 2026-05-03 vsftpd_234_backdoor episode (commit 4ab5477) on
    # k-gamingcom for the empirical evidence the conditional version
    # produced poisoned labels.
    usable_modules: dict[str, ModuleConfig] = (
-        dict(cfg.modules) if bridge_set
+        {k: v for k, v in cfg.modules.items() if not v.requires_bridge}
-        else {k: v for k, v in cfg.modules.items() if not v.requires_bridge}
+        if cfg.modules else {}
-    ) if cfg.modules else {}
+    )
    tier3_ready = (
        not cfg.force_tier2
        and bool(usable_modules)
@ -309,15 +314,13 @@ def _run_slot(
            target_ports += f",{extra_host_port}:{extra_host_port}"
            env["FLEET_PAYLOAD_LPORT"] = str(extra_host_port)
        env["TARGET_PORTS"] = target_ports
-        # When BRIDGE is unset, force SLIRP+hostfwd; when it IS set we
+        # Tier-3 always uses SLIRP+hostfwd. Strip BRIDGE so a host that
-        # keep it so requires_bridge modules (vsftpd backdoor on the
+        # has BRIDGE set for Tier-2 (pcap source 4) doesn't accidentally
-        # hardcoded port 6200, distccd, etc.) can reach the guest via
+        # propagate it into the Tier-3 launch_target.sh, which would try
-        # its own bridge IP. Refs Bug 1 in TIER3-BRINGUP.md (BRIDGE
+        # tap mode without the matching guest-IP discovery wired (see
-        # leaking from Tier-2 into Tier-3 broke things) — that fix was
+        # the usable_modules comment above for the matching reason this
-        # too aggressive; it stripped BRIDGE even when the module
+        # has to stay strict).
-        # legitimately needed it.
+        env.pop("BRIDGE", None)
        if not bridge_set:
            env.pop("BRIDGE", None)
        cmd = [
            py,
            str(cfg.repo_root / "tools" / "run_tier3_demo.py"),
--- a/tests/test_fleet.py
+++ b/tests/test_fleet.py
@ -323,29 +323,19 @@ def test_fleet_skips_requires_bridge_modules_when_no_bridge(monkeypatch, tmp_pat
        f"selected callback modules without BRIDGE: {seen_modules & callback_modules}"
-def test_fleet_uses_all_modules_when_bridge_set(monkeypatch, tmp_path) -> None:
+def test_tier3_strips_bridge_env_even_when_set(monkeypatch, tmp_path) -> None:
-    """With BRIDGE set, the full catalog (including reverse/bind shell
+    """Tier-3 always uses SLIRP+hostfwd because the rest of the pipeline
-    payloads) is in rotation."""
+    passes target_ip=127.0.0.1 regardless of bridge mode (no guest-IP
-    from orchestrator import fleet
+    discovery wired). If BRIDGE leaks into launch_target.sh's env, the
-    cfg = _fleet_cfg_with_modules(tmp_path)
+    target VM goes into tap mode without the matching IP discovery and
-    monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
+    every exploit times out against 127.0.0.1 — producing dishonest
-    monkeypatch.setenv("BRIDGE", "br-malware")
+    infected_running labels (PIPELINE.md §10). Strip BRIDGE from the
-    _patch_subprocess(monkeypatch)
+    Tier-3 subprocess env even when the operator set it for Tier-2.
    capacity = fleet.detect_capacity()
-    sample = cfg.manifest.samples[0]
+    Regression for: 2026-05-03 vsftpd_234_backdoor episode on
-    seen = set()
+    k-gamingcom (commit 4ab5477) — picker selected vsftpd because
-    for ep in range(40):
+    BRIDGE was set, episode timed out, schedule-clock wrote
-        res = fleet._run_slot(cfg, slot=0, sample=sample, episode_index=ep, capacity=capacity)
+    `infected_running` for an exploit that never landed."""
        if res.tier == "tier3" and res.module_name:
            seen.add(res.module_name)
    assert seen == set(cfg.modules.keys()), \
        f"only saw {seen}/{set(cfg.modules.keys())}"
 def test_fleet_propagates_bridge_env_to_runner(monkeypatch, tmp_path) -> None:
    """When BRIDGE is set in the parent env, the per-slot subprocess
    env must carry it through so launch_target.sh enters tap+bridge mode."""
    from orchestrator import fleet
    cfg = _fleet_cfg_with_modules(tmp_path)
    monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
@ -354,7 +344,35 @@ def test_fleet_propagates_bridge_env_to_runner(monkeypatch, tmp_path) -> None:
    capacity = fleet.detect_capacity()
    sample = cfg.manifest.samples[0]
    fleet._run_slot(cfg, slot=0, sample=sample, episode_index=0, capacity=capacity)
-    assert _RecordingPopen.calls[-1]["env"]["BRIDGE"] == "br-malware"
+    assert "BRIDGE" not in _RecordingPopen.calls[-1]["env"]
 def test_tier3_drops_requires_bridge_modules_unconditionally(monkeypatch, tmp_path) -> None:
    """Picker MUST drop requires_bridge modules even when BRIDGE is set,
    because the rest of the pipeline can't actually use them yet (no
    guest-IP discovery for bridge mode). Until that's wired, including
    them produces session_open_timeout + dishonest labels.
    Asserts the picker only ever returns the SLIRP-friendly subset
    across many episodes regardless of BRIDGE state."""
    from orchestrator import fleet
    cfg = _fleet_cfg_with_modules(tmp_path)
    monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
    monkeypatch.setenv("BRIDGE", "br-malware")
    _patch_subprocess(monkeypatch)
    capacity = fleet.detect_capacity()
    slirp_friendly = {k for k, v in cfg.modules.items() if not v.requires_bridge}
    sample = cfg.manifest.samples[0]
    seen = set()
    for ep in range(40):
        res = fleet._run_slot(cfg, slot=0, sample=sample,
                              episode_index=ep, capacity=capacity)
        if res.tier == "tier3" and res.module_name:
            seen.add(res.module_name)
    assert seen <= slirp_friendly, (
        f"picker returned bridge-only modules {seen - slirp_friendly}; "
        f"these can't reach the guest with target_ip=127.0.0.1"
    )
 def test_fleet_assigns_unique_port_base_per_slot(monkeypatch, tmp_path) -> None: