fix: revert speculative fleet picker change — was producing dishonest labels
Empirical evidence from k-gamingcom (commit 4ab5477, 2026-05-03 22:20Z
vsftpd_234_backdoor episode): the picker selected vsftpd because BRIDGE
was set on that host. The exploit fires against target_ip=127.0.0.1
(SLIRP loopback) but vsftpd's hardcoded port-6200 backdoor is reachable
only at the guest's bridge IP. Result: session_open_timeout, AND a
schedule-clock-driven `infected_running` label was still written for
the failed exploit — exactly the §10 poisoned-training-example pattern.
Until guest-IP discovery for bridge mode is wired (a separate piece of
infrastructure), bridge-only modules can't actually reach their target
even when the operator sets BRIDGE for Tier-2's pcap source. Revert
the picker to its prior conservative form: drop requires_bridge modules
unconditionally regardless of BRIDGE state. Same for the BRIDGE env
strip in the Tier-3 launch path — it was correct as unconditional.
Replaces the two aspirational tests
(test_fleet_uses_all_modules_when_bridge_set,
test_fleet_propagates_bridge_env_to_runner) with their honest negatives
(test_tier3_drops_requires_bridge_modules_unconditionally,
test_tier3_strips_bridge_env_even_when_set). The previous tests asserted
behavior the rest of the pipeline can't deliver; they were false signals.
229 passed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ac7b85ff8d
commit
0390eb20b6
2 changed files with 66 additions and 45 deletions
|
|
@ -244,20 +244,25 @@ def _run_slot(
|
||||||
|
|
||||||
# Decide tier.
|
# Decide tier.
|
||||||
# Tier-3 modules split into two classes by `requires_bridge`:
|
# Tier-3 modules split into two classes by `requires_bridge`:
|
||||||
# - bind/reverse-shell payloads under SLIRP need only loopback
|
# - SLIRP-friendly bind shells like samba_usermap_script's
|
||||||
# hostfwd (samba_usermap_script with bind_perl, etc.).
|
# cmd/unix/bind_perl (handler connects in over hostfwd).
|
||||||
# - modules with hardcoded callback ports or guest-driven
|
# - Bridge-only modules (vsftpd's port-6200 backdoor, distccd,
|
||||||
# callbacks (vsftpd's port-6200 backdoor, distccd, php_cgi,
|
# php_cgi, unreal_ircd) where the handler must reach the
|
||||||
# unreal_ircd) need a bridge so each guest gets its own IP.
|
# guest at its own bridge IP.
|
||||||
# When the operator sets BRIDGE (= bridge configured + tap
|
# The bridge-only set is filtered out unconditionally because the
|
||||||
# available), every module is usable. Without BRIDGE we drop the
|
# rest of the pipeline currently passes target_ip=127.0.0.1 (SLIRP
|
||||||
# bridge-only ones — running them under SLIRP would either fail
|
# loopback) regardless of bridge mode, so bridge-only modules
|
||||||
# to land or collide on shared loopback ports across slots.
|
# land in target_ip mismatches that produce session_open_timeout
|
||||||
bridge_set = bool(os.environ.get("BRIDGE"))
|
# AND a dishonest infected_running label (PIPELINE.md §10). When
|
||||||
|
# target-IP discovery from the guest's bridge lease lands, this
|
||||||
|
# filter can be made conditional on `bridge_set` again. See the
|
||||||
|
# 2026-05-03 vsftpd_234_backdoor episode (commit 4ab5477) on
|
||||||
|
# k-gamingcom for the empirical evidence the conditional version
|
||||||
|
# produced poisoned labels.
|
||||||
usable_modules: dict[str, ModuleConfig] = (
|
usable_modules: dict[str, ModuleConfig] = (
|
||||||
dict(cfg.modules) if bridge_set
|
{k: v for k, v in cfg.modules.items() if not v.requires_bridge}
|
||||||
else {k: v for k, v in cfg.modules.items() if not v.requires_bridge}
|
if cfg.modules else {}
|
||||||
) if cfg.modules else {}
|
)
|
||||||
tier3_ready = (
|
tier3_ready = (
|
||||||
not cfg.force_tier2
|
not cfg.force_tier2
|
||||||
and bool(usable_modules)
|
and bool(usable_modules)
|
||||||
|
|
@ -309,15 +314,13 @@ def _run_slot(
|
||||||
target_ports += f",{extra_host_port}:{extra_host_port}"
|
target_ports += f",{extra_host_port}:{extra_host_port}"
|
||||||
env["FLEET_PAYLOAD_LPORT"] = str(extra_host_port)
|
env["FLEET_PAYLOAD_LPORT"] = str(extra_host_port)
|
||||||
env["TARGET_PORTS"] = target_ports
|
env["TARGET_PORTS"] = target_ports
|
||||||
# When BRIDGE is unset, force SLIRP+hostfwd; when it IS set we
|
# Tier-3 always uses SLIRP+hostfwd. Strip BRIDGE so a host that
|
||||||
# keep it so requires_bridge modules (vsftpd backdoor on the
|
# has BRIDGE set for Tier-2 (pcap source 4) doesn't accidentally
|
||||||
# hardcoded port 6200, distccd, etc.) can reach the guest via
|
# propagate it into the Tier-3 launch_target.sh, which would try
|
||||||
# its own bridge IP. Refs Bug 1 in TIER3-BRINGUP.md (BRIDGE
|
# tap mode without the matching guest-IP discovery wired (see
|
||||||
# leaking from Tier-2 into Tier-3 broke things) — that fix was
|
# the usable_modules comment above for the matching reason this
|
||||||
# too aggressive; it stripped BRIDGE even when the module
|
# has to stay strict).
|
||||||
# legitimately needed it.
|
env.pop("BRIDGE", None)
|
||||||
if not bridge_set:
|
|
||||||
env.pop("BRIDGE", None)
|
|
||||||
cmd = [
|
cmd = [
|
||||||
py,
|
py,
|
||||||
str(cfg.repo_root / "tools" / "run_tier3_demo.py"),
|
str(cfg.repo_root / "tools" / "run_tier3_demo.py"),
|
||||||
|
|
|
||||||
|
|
@ -323,29 +323,19 @@ def test_fleet_skips_requires_bridge_modules_when_no_bridge(monkeypatch, tmp_pat
|
||||||
f"selected callback modules without BRIDGE: {seen_modules & callback_modules}"
|
f"selected callback modules without BRIDGE: {seen_modules & callback_modules}"
|
||||||
|
|
||||||
|
|
||||||
def test_fleet_uses_all_modules_when_bridge_set(monkeypatch, tmp_path) -> None:
|
def test_tier3_strips_bridge_env_even_when_set(monkeypatch, tmp_path) -> None:
|
||||||
"""With BRIDGE set, the full catalog (including reverse/bind shell
|
"""Tier-3 always uses SLIRP+hostfwd because the rest of the pipeline
|
||||||
payloads) is in rotation."""
|
passes target_ip=127.0.0.1 regardless of bridge mode (no guest-IP
|
||||||
from orchestrator import fleet
|
discovery wired). If BRIDGE leaks into launch_target.sh's env, the
|
||||||
cfg = _fleet_cfg_with_modules(tmp_path)
|
target VM goes into tap mode without the matching IP discovery and
|
||||||
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
|
every exploit times out against 127.0.0.1 — producing dishonest
|
||||||
monkeypatch.setenv("BRIDGE", "br-malware")
|
infected_running labels (PIPELINE.md §10). Strip BRIDGE from the
|
||||||
_patch_subprocess(monkeypatch)
|
Tier-3 subprocess env even when the operator set it for Tier-2.
|
||||||
capacity = fleet.detect_capacity()
|
|
||||||
|
|
||||||
sample = cfg.manifest.samples[0]
|
Regression for: 2026-05-03 vsftpd_234_backdoor episode on
|
||||||
seen = set()
|
k-gamingcom (commit 4ab5477) — picker selected vsftpd because
|
||||||
for ep in range(40):
|
BRIDGE was set, episode timed out, schedule-clock wrote
|
||||||
res = fleet._run_slot(cfg, slot=0, sample=sample, episode_index=ep, capacity=capacity)
|
`infected_running` for an exploit that never landed."""
|
||||||
if res.tier == "tier3" and res.module_name:
|
|
||||||
seen.add(res.module_name)
|
|
||||||
assert seen == set(cfg.modules.keys()), \
|
|
||||||
f"only saw {seen}/{set(cfg.modules.keys())}"
|
|
||||||
|
|
||||||
|
|
||||||
def test_fleet_propagates_bridge_env_to_runner(monkeypatch, tmp_path) -> None:
|
|
||||||
"""When BRIDGE is set in the parent env, the per-slot subprocess
|
|
||||||
env must carry it through so launch_target.sh enters tap+bridge mode."""
|
|
||||||
from orchestrator import fleet
|
from orchestrator import fleet
|
||||||
cfg = _fleet_cfg_with_modules(tmp_path)
|
cfg = _fleet_cfg_with_modules(tmp_path)
|
||||||
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
|
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
|
||||||
|
|
@ -354,7 +344,35 @@ def test_fleet_propagates_bridge_env_to_runner(monkeypatch, tmp_path) -> None:
|
||||||
capacity = fleet.detect_capacity()
|
capacity = fleet.detect_capacity()
|
||||||
sample = cfg.manifest.samples[0]
|
sample = cfg.manifest.samples[0]
|
||||||
fleet._run_slot(cfg, slot=0, sample=sample, episode_index=0, capacity=capacity)
|
fleet._run_slot(cfg, slot=0, sample=sample, episode_index=0, capacity=capacity)
|
||||||
assert _RecordingPopen.calls[-1]["env"]["BRIDGE"] == "br-malware"
|
assert "BRIDGE" not in _RecordingPopen.calls[-1]["env"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_tier3_drops_requires_bridge_modules_unconditionally(monkeypatch, tmp_path) -> None:
|
||||||
|
"""Picker MUST drop requires_bridge modules even when BRIDGE is set,
|
||||||
|
because the rest of the pipeline can't actually use them yet (no
|
||||||
|
guest-IP discovery for bridge mode). Until that's wired, including
|
||||||
|
them produces session_open_timeout + dishonest labels.
|
||||||
|
|
||||||
|
Asserts the picker only ever returns the SLIRP-friendly subset
|
||||||
|
across many episodes regardless of BRIDGE state."""
|
||||||
|
from orchestrator import fleet
|
||||||
|
cfg = _fleet_cfg_with_modules(tmp_path)
|
||||||
|
monkeypatch.setattr(fleet, "_msfrpcd_available", lambda *a, **kw: True)
|
||||||
|
monkeypatch.setenv("BRIDGE", "br-malware")
|
||||||
|
_patch_subprocess(monkeypatch)
|
||||||
|
capacity = fleet.detect_capacity()
|
||||||
|
slirp_friendly = {k for k, v in cfg.modules.items() if not v.requires_bridge}
|
||||||
|
sample = cfg.manifest.samples[0]
|
||||||
|
seen = set()
|
||||||
|
for ep in range(40):
|
||||||
|
res = fleet._run_slot(cfg, slot=0, sample=sample,
|
||||||
|
episode_index=ep, capacity=capacity)
|
||||||
|
if res.tier == "tier3" and res.module_name:
|
||||||
|
seen.add(res.module_name)
|
||||||
|
assert seen <= slirp_friendly, (
|
||||||
|
f"picker returned bridge-only modules {seen - slirp_friendly}; "
|
||||||
|
f"these can't reach the guest with target_ip=127.0.0.1"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_fleet_assigns_unique_port_base_per_slot(monkeypatch, tmp_path) -> None:
|
def test_fleet_assigns_unique_port_base_per_slot(monkeypatch, tmp_path) -> None:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue