Phase labels are written ONLY when justifying events arrive. The
schedule clock is now a budget — an upper bound — never a label
source. This is the core honesty fix the §3 evidence demanded:
Before: every Tier-3 episode wrote `infected_running` from the
schedule clock regardless of whether session_open ever
fired. Per §10 every dishonest label is a poisoned
training example. 67/67 of the §3 probe episodes were
poisoned this way.
After: `infecting` writes ONLY when exploit_fire is observed in
events.jsonl. `infected_running` writes ONLY when
session_open is observed. Either timing out or seeing
session_open_timeout terminates the walker with a `failed`
label that the §4.6 acceptance gate will reject.
PHASE_JUSTIFYING_EVENTS in orchestrator/episode.py declares which
events justify which phases:
"clean": None # orchestrator-emitted
"armed": None # orchestrator-emitted
"infecting": ("exploit_fire",)
"infected_running": ("session_open",)
TERMINAL_FAILURE_EVENTS = {"session_open_timeout"} short-circuit any
event-driven wait into a `failed` label.
`dormant` is intentionally OFF the canonical schedule. §4.5 calls
for dormant to be event-driven (session_idle / session_active) too,
but the driver doesn't emit those yet. Per §1 default-to-removal we
ship without dormant rather than label it from the clock; when the
driver gains those emits, dormant re-enters the schedule with
proper justification.
EpisodeRunner now owns:
* `_event_log` — every emit_event appends here
* `_event_cv` — condition variable for waiters
* `_wait_for_event(names, since_t_mono_ns, timeout_s)` — returns
the first matching event in the log
with t_mono >= threshold; threshold
catches events that fired during
the previous on_phase callback.
When an event-driven phase's justifier already arrived (e.g.
exploit_fire emitted by driver._fire() inside on_phase("armed")),
the walker uses the EVENT's t_mono on the label — not the time the
walker noticed. The label means "this is when this thing actually
happened."
manifest.toml: dropped the dormant cycle from the canonical schedule.
Episode is shorter (~30s) but every label is event-justified.
14 new tests in tests/test_event_driven_labeller.py covering: justifier
mapping invariants, _wait_for_event semantics (already-arrived,
future, timeout, since-threshold, first-of-multiple-names), walker
behavior (orchestrator-emitted phases, event-driven phases, missing
event → failed, terminal-failure-event short-circuit, stop event,
event-t_mono on label, phase_transition events with justified_by).
286 tests passing.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
278 lines
10 KiB
Python
278 lines
10 KiB
Python
"""Tests for the §4.5 event-driven labeller in orchestrator/episode.py.
|
|
|
|
The walker labels each phase ONLY when a justifying event is observed.
|
|
Schedule clock is a budget, never a label source. Failures (timeout
|
|
or terminal-failure event) emit `failed` and break the walker."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from orchestrator.episode import (
|
|
PHASE_JUSTIFYING_EVENTS, EpisodeConfig, EpisodeRunner,
|
|
TERMINAL_FAILURE_EVENTS,
|
|
)
|
|
|
|
|
|
def _make_runner(tmp_path: Path, *, schedule, target_pid=None) -> EpisodeRunner:
|
|
"""Build a runner that won't actually walk collectors — we only
|
|
care about the labeller. EpisodeConfig.target_pid points at the
|
|
test process so _pid_alive returns True."""
|
|
cfg = EpisodeConfig(
|
|
target_pid=target_pid or os.getpid(),
|
|
duration_s=sum(d for _, d in schedule),
|
|
data_root=tmp_path,
|
|
phase_schedule=schedule,
|
|
interval_ms=100,
|
|
)
|
|
runner = EpisodeRunner(cfg)
|
|
runner._t_mono_origin_ns = time.monotonic_ns()
|
|
return runner
|
|
|
|
|
|
def _read_labels(runner: EpisodeRunner) -> list[dict]:
|
|
p = runner.episode_dir / "labels.jsonl"
|
|
if not p.exists():
|
|
return []
|
|
return [json.loads(l) for l in p.read_text().splitlines() if l.strip()]
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
# Justifier mapping invariants
|
|
# ---------------------------------------------------------------------
|
|
|
|
|
|
def test_canonical_phase_justifiers_present() -> None:
|
|
"""The canonical event-driven phases (infecting, infected_running)
|
|
must have justifying events declared. clean / armed are
|
|
orchestrator-emitted (None)."""
|
|
assert PHASE_JUSTIFYING_EVENTS["clean"] is None
|
|
assert PHASE_JUSTIFYING_EVENTS["armed"] is None
|
|
assert "exploit_fire" in PHASE_JUSTIFYING_EVENTS["infecting"]
|
|
assert "session_open" in PHASE_JUSTIFYING_EVENTS["infected_running"]
|
|
|
|
|
|
def test_session_open_timeout_is_terminal() -> None:
|
|
assert "session_open_timeout" in TERMINAL_FAILURE_EVENTS
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
# wait_for_event — picks up already-arrived + future events
|
|
# ---------------------------------------------------------------------
|
|
|
|
|
|
def test_wait_for_event_returns_already_arrived(tmp_path) -> None:
|
|
runner = _make_runner(tmp_path, schedule=[])
|
|
# Emit BEFORE waiting; the wait should return immediately.
|
|
runner.emit_event("test_event", marker=1)
|
|
ev = runner._wait_for_event("test_event", since_t_mono_ns=0, timeout_s=0.1)
|
|
assert ev is not None
|
|
assert ev["event"] == "test_event"
|
|
assert ev["marker"] == 1
|
|
|
|
|
|
def test_wait_for_event_picks_up_future(tmp_path) -> None:
|
|
runner = _make_runner(tmp_path, schedule=[])
|
|
fired = threading.Event()
|
|
|
|
def fire_after_delay():
|
|
time.sleep(0.05)
|
|
runner.emit_event("late_event", from_thread=True)
|
|
fired.set()
|
|
|
|
threading.Thread(target=fire_after_delay, daemon=True).start()
|
|
ev = runner._wait_for_event("late_event", since_t_mono_ns=0, timeout_s=2.0)
|
|
fired.wait(timeout=2.0)
|
|
assert ev is not None
|
|
assert ev["from_thread"] is True
|
|
|
|
|
|
def test_wait_for_event_times_out(tmp_path) -> None:
|
|
runner = _make_runner(tmp_path, schedule=[])
|
|
t0 = time.monotonic()
|
|
ev = runner._wait_for_event("never_fires", since_t_mono_ns=0, timeout_s=0.2)
|
|
elapsed = time.monotonic() - t0
|
|
assert ev is None
|
|
assert 0.15 < elapsed < 1.0 # roughly the timeout, not minutes
|
|
|
|
|
|
def test_wait_for_event_filters_by_since_threshold(tmp_path) -> None:
|
|
runner = _make_runner(tmp_path, schedule=[])
|
|
runner.emit_event("e", v=1)
|
|
# Emit puts t_mono_ns >= 0 in the row. since=10**18 (way in the
|
|
# future) means no event qualifies.
|
|
ev = runner._wait_for_event(
|
|
"e", since_t_mono_ns=10**18, timeout_s=0.1,
|
|
)
|
|
assert ev is None
|
|
|
|
|
|
def test_wait_for_event_accepts_first_of_multiple_names(tmp_path) -> None:
|
|
runner = _make_runner(tmp_path, schedule=[])
|
|
runner.emit_event("a")
|
|
runner.emit_event("b")
|
|
ev = runner._wait_for_event(("b", "a"), since_t_mono_ns=0, timeout_s=0.1)
|
|
assert ev is not None
|
|
# First chronologically among the matches; we emitted `a` first.
|
|
assert ev["event"] == "a"
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
# Walker behavior
|
|
# ---------------------------------------------------------------------
|
|
|
|
|
|
def test_walker_emits_orchestrator_phases_immediately(tmp_path) -> None:
|
|
"""clean and armed are orchestrator-emitted; their labels write
|
|
on phase entry, not on event arrival."""
|
|
runner = _make_runner(tmp_path, schedule=[
|
|
("clean", 0.05),
|
|
("armed", 0.05),
|
|
])
|
|
observed = runner._walk_schedule()
|
|
assert observed == ["clean", "armed"]
|
|
labels = _read_labels(runner)
|
|
assert [l["phase"] for l in labels] == ["clean", "armed"]
|
|
assert all(l["reason"] == "orchestrator_emitted" for l in labels)
|
|
|
|
|
|
def test_walker_writes_infecting_only_on_exploit_fire(tmp_path) -> None:
|
|
"""infecting label must only appear when exploit_fire fires.
|
|
Without the event, walker emits failed."""
|
|
runner = _make_runner(tmp_path, schedule=[
|
|
("clean", 0.05),
|
|
("armed", 0.05),
|
|
("infecting", 0.1),
|
|
])
|
|
observed = runner._walk_schedule()
|
|
assert observed == ["clean", "armed", "failed"]
|
|
labels = _read_labels(runner)
|
|
phase_names = [l["phase"] for l in labels]
|
|
assert "infecting" not in phase_names
|
|
assert phase_names[-1] == "failed"
|
|
fail_label = labels[-1]
|
|
assert "timeout_no_exploit_fire" in fail_label["reason"]
|
|
|
|
|
|
def test_walker_writes_infecting_when_event_arrives(tmp_path) -> None:
|
|
"""When exploit_fire is emitted (during armed or infecting), the
|
|
infecting label is written using the EVENT's t_mono."""
|
|
schedule = [
|
|
("clean", 0.05),
|
|
("armed", 0.05),
|
|
("infecting", 0.5),
|
|
("infected_running", 0.5),
|
|
]
|
|
runner = _make_runner(tmp_path, schedule=schedule)
|
|
|
|
# Driver-side simulator: when armed phase begins, fire exploit;
|
|
# when infecting begins, "open the session".
|
|
def on_phase(p):
|
|
if p == "armed":
|
|
runner.emit_event("exploit_fire")
|
|
elif p == "infecting":
|
|
runner.emit_event("session_open", session_id=1)
|
|
runner.on_phase = on_phase
|
|
|
|
observed = runner._walk_schedule()
|
|
assert observed == ["clean", "armed", "infecting", "infected_running"]
|
|
labels = _read_labels(runner)
|
|
phase_names = [l["phase"] for l in labels]
|
|
assert phase_names == ["clean", "armed", "infecting", "infected_running"]
|
|
# Reasons reflect event justification on the event-driven phases.
|
|
assert labels[2]["reason"] == "event:exploit_fire"
|
|
assert labels[3]["reason"] == "event:session_open"
|
|
|
|
|
|
def test_walker_uses_event_t_mono_not_walker_t_mono(tmp_path) -> None:
|
|
"""When exploit_fire fires DURING armed phase (i.e. before the
|
|
walker enters infecting), the infecting label must carry the
|
|
EVENT's t_mono — not the time the walker noticed."""
|
|
schedule = [
|
|
("clean", 0.02),
|
|
("armed", 0.5), # long armed; event fires early in it
|
|
("infecting", 0.5),
|
|
]
|
|
runner = _make_runner(tmp_path, schedule=schedule)
|
|
fired_at = [None]
|
|
|
|
def on_phase(p):
|
|
if p == "armed":
|
|
fired_at[0] = time.monotonic_ns() - runner._t_mono_origin_ns
|
|
runner.emit_event("exploit_fire")
|
|
runner.on_phase = on_phase
|
|
|
|
runner._walk_schedule()
|
|
labels = _read_labels(runner)
|
|
infecting = next(l for l in labels if l["phase"] == "infecting")
|
|
# The infecting label's t_mono should be very close to fired_at,
|
|
# not to when the walker entered infecting (which is much later).
|
|
assert abs(infecting["t_mono_ns"] - fired_at[0]) < 5_000_000 # 5ms
|
|
|
|
|
|
def test_walker_terminal_failure_event_short_circuits(tmp_path) -> None:
|
|
"""If session_open_timeout arrives during the wait for
|
|
session_open, walker must emit `failed` (not infected_running)
|
|
and stop walking."""
|
|
schedule = [
|
|
("clean", 0.02),
|
|
("armed", 0.02),
|
|
("infecting", 0.5),
|
|
("infected_running", 0.5),
|
|
]
|
|
runner = _make_runner(tmp_path, schedule=schedule)
|
|
|
|
def on_phase(p):
|
|
if p == "armed":
|
|
runner.emit_event("exploit_fire")
|
|
elif p == "infected_running":
|
|
runner.emit_event("session_open_timeout", timeout_s=30)
|
|
runner.on_phase = on_phase
|
|
|
|
observed = runner._walk_schedule()
|
|
assert observed == ["clean", "armed", "infecting", "failed"]
|
|
labels = _read_labels(runner)
|
|
fail_label = labels[-1]
|
|
assert fail_label["phase"] == "failed"
|
|
assert "session_open_timeout" in fail_label["reason"]
|
|
|
|
|
|
def test_walker_stop_event_breaks_walk(tmp_path) -> None:
|
|
schedule = [("clean", 0.5), ("armed", 0.5), ("infecting", 0.5)]
|
|
runner = _make_runner(tmp_path, schedule=schedule)
|
|
runner._stop.set()
|
|
observed = runner._walk_schedule()
|
|
assert observed == []
|
|
|
|
|
|
def test_walker_writes_phase_transition_events(tmp_path) -> None:
|
|
"""Each phase transition (orchestrator-emitted or event-driven)
|
|
must also fire a phase_transition event into events.jsonl. The
|
|
event-driven version carries `justified_by` so the §4.6 gate can
|
|
cross-check."""
|
|
schedule = [("clean", 0.02), ("armed", 0.02), ("infecting", 0.5)]
|
|
runner = _make_runner(tmp_path, schedule=schedule)
|
|
|
|
def on_phase(p):
|
|
if p == "armed":
|
|
runner.emit_event("exploit_fire")
|
|
runner.on_phase = on_phase
|
|
|
|
runner._walk_schedule()
|
|
events_path = runner.episode_dir / "events.jsonl"
|
|
events = [json.loads(l) for l in events_path.read_text().splitlines() if l.strip()]
|
|
transitions = [e for e in events if e["event"] == "phase_transition"]
|
|
transition_phases = [e["to"] for e in transitions]
|
|
assert transition_phases == ["clean", "armed", "infecting"]
|
|
# Event-driven phase carries justified_by; orchestrator-emitted
|
|
# phases don't.
|
|
infecting_t = next(e for e in transitions if e["to"] == "infecting")
|
|
assert infecting_t["justified_by"] == "exploit_fire"
|
|
armed_t = next(e for e in transitions if e["to"] == "armed")
|
|
assert "justified_by" not in armed_t
|