Closes a real reproducibility gap. Three weeks of bug fixes have shipped (probe fix in2707709, multi-signal classifier in321ea63, mandatory tier-4 in265f3ad, etc.); without a per-episode code_version, trainers can't tell which episodes came from buggy pre-fix code and have to scan every tarball to guess. Resolution priority (cached across episodes): 1. $INSTALL_ROOT/VERSION (production — install-lab-host.sh writes it at install time since /opt/cis490 is a flat copy with no .git) 2. git rev-parse HEAD from the repo root (dev clones) 3. {"commit": "unknown", source: "unknown"} so the field is always present (filterable) Output shape, always present in meta.json: "code_version": { "commit": "<40-hex>" | "unknown", "branch": "<name>" | null, "dirty": bool | null, "source": "VERSION-file" | "git" | "unknown" } install-lab-host.sh writes VERSION at install time with the source repo's git rev-parse HEAD + branch + clean-tree flag + install timestamp. Lab-host agents that pull main + re-run install-lab-host.sh get a fresh stamp automatically. 148/148 tests pass; test_episode_against_self_pid_produces_full_directory asserts the field's presence + valid `source` value.
148 lines
5 KiB
Python
148 lines
5 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from orchestrator.episode import EpisodeConfig, EpisodeRunner
|
|
|
|
|
|
def _read_jsonl(p: Path) -> list[dict]:
|
|
return [json.loads(l) for l in p.read_text().splitlines()]
|
|
|
|
|
|
def test_episode_against_self_pid_produces_full_directory(tmp_path: Path) -> None:
|
|
cfg = EpisodeConfig(
|
|
target_pid=os.getpid(),
|
|
duration_s=0.5,
|
|
interval_ms=50,
|
|
data_root=tmp_path,
|
|
)
|
|
result = EpisodeRunner(cfg).run()
|
|
|
|
d = result.episode_dir
|
|
assert d.exists()
|
|
assert (d / "meta.json").exists()
|
|
assert (d / "events.jsonl").exists()
|
|
assert (d / "labels.jsonl").exists()
|
|
assert (d / "telemetry-proc.jsonl").exists()
|
|
assert (d / "done.marker").exists()
|
|
|
|
# meta.json structure
|
|
meta = json.loads((d / "meta.json").read_text())
|
|
assert meta["episode_id"] == result.episode_id
|
|
assert meta["schema_version"] == 1
|
|
# code_version stamps which commit produced the episode so trainers
|
|
# can filter out pre-fix data without scanning every tarball.
|
|
assert "code_version" in meta
|
|
cv = meta["code_version"]
|
|
assert "commit" in cv and "source" in cv
|
|
# Source is "git" (we run tests in a git checkout) or "VERSION-file"
|
|
# (someone running tests against /opt/cis490/) or "unknown" (CI
|
|
# without git). All three are acceptable; the field is what matters.
|
|
assert cv["source"] in {"git", "VERSION-file", "unknown"}
|
|
assert meta["started_at_wall"] is not None
|
|
assert meta["ended_at_wall"] is not None
|
|
assert meta["vm"]["target_pid"] == os.getpid()
|
|
assert meta["schedule"]["baseline_seconds"] == 0.5
|
|
assert meta["schedule"]["interval_ms"] == 50
|
|
assert meta["result"]["rows_proc"] == result.rows_proc
|
|
assert "clean" in meta["result"]["phases_observed"]
|
|
|
|
# labels.jsonl: at least one clean label at t=0.
|
|
labels = _read_jsonl(d / "labels.jsonl")
|
|
assert any(r["phase"] == "clean" and r["t_mono_ns"] == 0 for r in labels)
|
|
|
|
# events.jsonl: snapshot_load + episode_end.
|
|
events = _read_jsonl(d / "events.jsonl")
|
|
event_names = [e["event"] for e in events]
|
|
assert "snapshot_load" in event_names
|
|
assert "episode_end" in event_names
|
|
|
|
# telemetry-proc.jsonl: roughly 10 ticks @ 50ms over 500ms.
|
|
proc_rows = _read_jsonl(d / "telemetry-proc.jsonl")
|
|
assert len(proc_rows) >= 5
|
|
for row in proc_rows:
|
|
assert row["source"] == "host_proc"
|
|
assert row["available_in_deployment"] is False
|
|
assert row["rss_bytes"] > 0
|
|
|
|
|
|
def test_episode_id_can_be_overridden(tmp_path: Path) -> None:
|
|
cfg = EpisodeConfig(
|
|
target_pid=os.getpid(),
|
|
duration_s=0.1,
|
|
interval_ms=50,
|
|
data_root=tmp_path,
|
|
episode_id="01TEST",
|
|
)
|
|
result = EpisodeRunner(cfg).run()
|
|
assert result.episode_id == "01TEST"
|
|
assert result.episode_dir == tmp_path / "episodes" / "01TEST"
|
|
|
|
|
|
def test_meta_sample_records_full_sample_when_passed(tmp_path: Path) -> None:
|
|
"""EpisodeConfig.sample → meta.sample carries identity + kind so
|
|
trainers can join episodes by family/sha256 without re-deriving
|
|
from events. With no Sample, meta.sample stays null."""
|
|
import os as _os
|
|
|
|
from samples.manifest import Sample
|
|
|
|
s = Sample(
|
|
name="xmrig-cryptominer",
|
|
family="XMRig",
|
|
category="cryptominer",
|
|
profile="cpu-saturate",
|
|
sha256="abc" * 21 + "d", # 64 hex
|
|
source="MalwareBazaar",
|
|
)
|
|
cfg = EpisodeConfig(
|
|
target_pid=_os.getpid(),
|
|
duration_s=0.1,
|
|
interval_ms=50,
|
|
data_root=tmp_path,
|
|
sample=s,
|
|
)
|
|
result = EpisodeRunner(cfg).run()
|
|
|
|
meta = json.loads((result.episode_dir / "meta.json").read_text())
|
|
assert meta["sample"] is not None
|
|
assert meta["sample"]["name"] == "xmrig-cryptominer"
|
|
assert meta["sample"]["family"] == "XMRig"
|
|
assert meta["sample"]["category"] == "cryptominer"
|
|
assert meta["sample"]["profile"] == "cpu-saturate"
|
|
assert meta["sample"]["kind"] == "real"
|
|
assert meta["sample"]["sha256"] == "abc" * 21 + "d"
|
|
|
|
|
|
def test_meta_sample_is_null_for_v1_path(tmp_path: Path) -> None:
|
|
"""No sample passed → the v1 fallback path. meta.sample stays
|
|
null so trainers can detect (and filter out) info-less runs."""
|
|
import os as _os
|
|
|
|
cfg = EpisodeConfig(
|
|
target_pid=_os.getpid(),
|
|
duration_s=0.1,
|
|
interval_ms=50,
|
|
data_root=tmp_path,
|
|
)
|
|
result = EpisodeRunner(cfg).run()
|
|
meta = json.loads((result.episode_dir / "meta.json").read_text())
|
|
assert meta["sample"] is None
|
|
|
|
|
|
def test_episode_writes_done_marker_last(tmp_path: Path) -> None:
|
|
"""done.marker should not appear until meta.json has ended_at_wall set."""
|
|
cfg = EpisodeConfig(
|
|
target_pid=os.getpid(),
|
|
duration_s=0.1,
|
|
interval_ms=50,
|
|
data_root=tmp_path,
|
|
)
|
|
result = EpisodeRunner(cfg).run()
|
|
assert (result.episode_dir / "done.marker").exists()
|
|
meta = json.loads((result.episode_dir / "meta.json").read_text())
|
|
assert meta["ended_at_wall"] is not None
|