diff --git a/.gitignore b/.gitignore index 11c9b47..8375b55 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,12 @@ vm/images/ vm/snapshots/ +# VERSION file is install-script-stamped (provenance for episodes +# generated from /opt/cis490 install copies). Tracking it would +# trigger spurious dirty-tree state on lab hosts and reject every +# episode at the §4.6 acceptance gate. +/VERSION + # Telemetry output data/episodes/ data/campaign.json diff --git a/orchestrator/manifest.py b/orchestrator/manifest.py index 43748f5..1fe2d5c 100644 --- a/orchestrator/manifest.py +++ b/orchestrator/manifest.py @@ -27,6 +27,33 @@ from pathlib import Path CANONICAL_FILENAME = "manifest.toml" + +def tier3_schedule_from(schedule: "tuple[Phase, ...]") -> list[tuple[str, float]]: + """Render the canonical schedule as the legacy + `[(name, seconds)]` format EpisodeConfig.phase_schedule expects. + + This IS the Tier-3 schedule: clean → armed → infecting → + infected_running → ... per the canonical manifest. Phase labels + are event-driven (PIPELINE.md §4.5) so durations are budgets, not + label sources.""" + return [(p.name, p.seconds) for p in schedule] + + +def tier2_schedule_from(schedule: "tuple[Phase, ...]") -> list[tuple[str, float]]: + """Tier-2 episodes have no exploit and no driver firing modules. + Walking the Tier-3 phase set on a Tier-2 episode produces dishonest + `infected_running` labels (PIPELINE.md §3 evidence — the original + sin) under clock-driven labelling, OR `failed` labels under + event-driven labelling (still useless for training). + + Honest fix: Tier-2 episodes ride a single `clean` phase for the + same total wall-clock as the Tier-3 walk so episode lengths are + comparable across tiers (no length-bias in the dataset). Every + telemetry row on a Tier-2 episode is tagged `clean` because + nothing infected anything.""" + total = sum(p.seconds for p in schedule) + return [("clean", float(total))] + # Closed enums — keep in sync with the corresponding code that # implements each name. A name not in these sets means the manifest # is asking for something the orchestrator doesn't know how to do. diff --git a/tools/run_real_vm_demo.py b/tools/run_real_vm_demo.py index e1795ff..bea543a 100644 --- a/tools/run_real_vm_demo.py +++ b/tools/run_real_vm_demo.py @@ -29,23 +29,21 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) from collectors import qmp # noqa: E402 from orchestrator.episode import EpisodeConfig, EpisodeRunner # noqa: E402 -from orchestrator.manifest import ManifestError, load_canonical # noqa: E402 +from orchestrator.manifest import ( # noqa: E402 + ManifestError, load_canonical, tier2_schedule_from, +) from samples.manifest import SampleManifest # noqa: E402 from vm_load_controller import VMLoadController # noqa: E402 from vm_serial import SerialClient # noqa: E402 -# Same shape as run_envelope_demo so plots are comparable. -DEFAULT_SCHEDULE = [ - ("clean", 10.0), - ("armed", 2.0), - ("infecting", 3.0), - ("infected_running", 25.0), - ("dormant", 15.0), - ("infected_running", 20.0), - ("dormant", 5.0), - ("clean", 5.0), -] +# Tier-2 episodes have no exploit firing — their schedule is derived +# from the canonical Tier-3 schedule total duration (PIPELINE.md §4.1 +# canonical manifest, §4.5 event-driven labeller, §10 honest labels). +# `tier2_schedule_from(experiment.schedule)` produces a single `clean` +# phase for the same wall-clock as a Tier-3 walk; that keeps episode +# lengths comparable across tiers without minting `infected_running` +# labels for episodes where nothing infected anything. def _wait_for_socket(path: Path, timeout_s: float) -> None: @@ -214,12 +212,13 @@ def main() -> int: controller.setup() agent_sock = run_dir / "agent.sock" + schedule = tier2_schedule_from(experiment.schedule) cfg = EpisodeConfig( target_pid=qemu_pid, - duration_s=sum(d for _, d in DEFAULT_SCHEDULE), + duration_s=sum(d for _, d in schedule), interval_ms=args.interval_ms, data_root=Path(args.data_root), - phase_schedule=DEFAULT_SCHEDULE, + phase_schedule=schedule, image_name="alpine-3.21-cloudinit", snapshot_name="baseline-v1", qmp_socket=qmp_sock if qmp_sock.exists() else None, diff --git a/tools/run_tier3_demo.py b/tools/run_tier3_demo.py index 4a90af9..2e76078 100644 --- a/tools/run_tier3_demo.py +++ b/tools/run_tier3_demo.py @@ -38,23 +38,18 @@ from exploits.driver import DriverConfig, MSFExploitDriver # noqa: E402 from exploits.modules import load_module_config # noqa: E402 from exploits.msfrpc import MSFRpcClient, MSFRpcConfig # noqa: E402 from orchestrator.episode import EpisodeConfig, EpisodeRunner # noqa: E402 -from orchestrator.manifest import ManifestError, load_canonical # noqa: E402 +from orchestrator.manifest import ( # noqa: E402 + ManifestError, load_canonical, tier3_schedule_from, +) from samples.manifest import SampleManifest # noqa: E402 -# Same envelope shape as Tier 2 so plots are comparable. Slightly more -# armed/infecting time because real exploit fire + session establishment -# takes hundreds of ms to a few seconds. -DEFAULT_SCHEDULE = [ - ("clean", 10.0), - ("armed", 3.0), - ("infecting", 5.0), - ("infected_running", 25.0), - ("dormant", 15.0), - ("infected_running", 20.0), - ("dormant", 5.0), - ("clean", 5.0), -] +# Tier-3 schedule comes from the canonical manifest at episode-launch +# time. Phase durations are budgets for the §4.5 event-driven labeller +# (clean/armed orchestrator-emitted; infecting/infected_running gated +# on exploit_fire / session_open events). Per-call lookup so a manifest +# amendment takes effect on the next episode without a service +# restart. def _wait_for_path(path: Path, timeout_s: float) -> None: @@ -304,12 +299,13 @@ def main() -> int: # configured but emits zero rows is exactly the silent-downgrade # pattern §1 forbids. agent_sock = run_dir / "agent.sock" + schedule = tier3_schedule_from(experiment.schedule) cfg = EpisodeConfig( target_pid=qemu_pid, - duration_s=sum(d for _, d in DEFAULT_SCHEDULE), + duration_s=sum(d for _, d in schedule), interval_ms=args.interval_ms, data_root=Path(args.data_root), - phase_schedule=DEFAULT_SCHEDULE, + phase_schedule=schedule, image_name=module.name + "-target", snapshot_name="baseline-v1", qmp_socket=qmp_sock if qmp_sock.exists() else None,