CIS490/tests/test_episode.py
max 5c0bc9af8e meta.json: stamp code_version (commit, branch, dirty) per episode
Closes a real reproducibility gap. Three weeks of bug fixes have
shipped (probe fix in 2707709, multi-signal classifier in 321ea63,
mandatory tier-4 in 265f3ad, etc.); without a per-episode
code_version, trainers can't tell which episodes came from buggy
pre-fix code and have to scan every tarball to guess.

Resolution priority (cached across episodes):
  1. $INSTALL_ROOT/VERSION (production — install-lab-host.sh writes
     it at install time since /opt/cis490 is a flat copy with no .git)
  2. git rev-parse HEAD from the repo root (dev clones)
  3. {"commit": "unknown", source: "unknown"} so the field is always
     present (filterable)

Output shape, always present in meta.json:
  "code_version": {
    "commit": "<40-hex>" | "unknown",
    "branch": "<name>" | null,
    "dirty":  bool | null,
    "source": "VERSION-file" | "git" | "unknown"
  }

install-lab-host.sh writes VERSION at install time with the source
repo's git rev-parse HEAD + branch + clean-tree flag + install
timestamp. Lab-host agents that pull main + re-run install-lab-host.sh
get a fresh stamp automatically.

148/148 tests pass; test_episode_against_self_pid_produces_full_directory
asserts the field's presence + valid `source` value.
2026-05-01 01:29:01 -05:00

148 lines
5 KiB
Python

from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from orchestrator.episode import EpisodeConfig, EpisodeRunner
def _read_jsonl(p: Path) -> list[dict]:
return [json.loads(l) for l in p.read_text().splitlines()]
def test_episode_against_self_pid_produces_full_directory(tmp_path: Path) -> None:
cfg = EpisodeConfig(
target_pid=os.getpid(),
duration_s=0.5,
interval_ms=50,
data_root=tmp_path,
)
result = EpisodeRunner(cfg).run()
d = result.episode_dir
assert d.exists()
assert (d / "meta.json").exists()
assert (d / "events.jsonl").exists()
assert (d / "labels.jsonl").exists()
assert (d / "telemetry-proc.jsonl").exists()
assert (d / "done.marker").exists()
# meta.json structure
meta = json.loads((d / "meta.json").read_text())
assert meta["episode_id"] == result.episode_id
assert meta["schema_version"] == 1
# code_version stamps which commit produced the episode so trainers
# can filter out pre-fix data without scanning every tarball.
assert "code_version" in meta
cv = meta["code_version"]
assert "commit" in cv and "source" in cv
# Source is "git" (we run tests in a git checkout) or "VERSION-file"
# (someone running tests against /opt/cis490/) or "unknown" (CI
# without git). All three are acceptable; the field is what matters.
assert cv["source"] in {"git", "VERSION-file", "unknown"}
assert meta["started_at_wall"] is not None
assert meta["ended_at_wall"] is not None
assert meta["vm"]["target_pid"] == os.getpid()
assert meta["schedule"]["baseline_seconds"] == 0.5
assert meta["schedule"]["interval_ms"] == 50
assert meta["result"]["rows_proc"] == result.rows_proc
assert "clean" in meta["result"]["phases_observed"]
# labels.jsonl: at least one clean label at t=0.
labels = _read_jsonl(d / "labels.jsonl")
assert any(r["phase"] == "clean" and r["t_mono_ns"] == 0 for r in labels)
# events.jsonl: snapshot_load + episode_end.
events = _read_jsonl(d / "events.jsonl")
event_names = [e["event"] for e in events]
assert "snapshot_load" in event_names
assert "episode_end" in event_names
# telemetry-proc.jsonl: roughly 10 ticks @ 50ms over 500ms.
proc_rows = _read_jsonl(d / "telemetry-proc.jsonl")
assert len(proc_rows) >= 5
for row in proc_rows:
assert row["source"] == "host_proc"
assert row["available_in_deployment"] is False
assert row["rss_bytes"] > 0
def test_episode_id_can_be_overridden(tmp_path: Path) -> None:
cfg = EpisodeConfig(
target_pid=os.getpid(),
duration_s=0.1,
interval_ms=50,
data_root=tmp_path,
episode_id="01TEST",
)
result = EpisodeRunner(cfg).run()
assert result.episode_id == "01TEST"
assert result.episode_dir == tmp_path / "episodes" / "01TEST"
def test_meta_sample_records_full_sample_when_passed(tmp_path: Path) -> None:
"""EpisodeConfig.sample → meta.sample carries identity + kind so
trainers can join episodes by family/sha256 without re-deriving
from events. With no Sample, meta.sample stays null."""
import os as _os
from samples.manifest import Sample
s = Sample(
name="xmrig-cryptominer",
family="XMRig",
category="cryptominer",
profile="cpu-saturate",
sha256="abc" * 21 + "d", # 64 hex
source="MalwareBazaar",
)
cfg = EpisodeConfig(
target_pid=_os.getpid(),
duration_s=0.1,
interval_ms=50,
data_root=tmp_path,
sample=s,
)
result = EpisodeRunner(cfg).run()
meta = json.loads((result.episode_dir / "meta.json").read_text())
assert meta["sample"] is not None
assert meta["sample"]["name"] == "xmrig-cryptominer"
assert meta["sample"]["family"] == "XMRig"
assert meta["sample"]["category"] == "cryptominer"
assert meta["sample"]["profile"] == "cpu-saturate"
assert meta["sample"]["kind"] == "real"
assert meta["sample"]["sha256"] == "abc" * 21 + "d"
def test_meta_sample_is_null_for_v1_path(tmp_path: Path) -> None:
"""No sample passed → the v1 fallback path. meta.sample stays
null so trainers can detect (and filter out) info-less runs."""
import os as _os
cfg = EpisodeConfig(
target_pid=_os.getpid(),
duration_s=0.1,
interval_ms=50,
data_root=tmp_path,
)
result = EpisodeRunner(cfg).run()
meta = json.loads((result.episode_dir / "meta.json").read_text())
assert meta["sample"] is None
def test_episode_writes_done_marker_last(tmp_path: Path) -> None:
"""done.marker should not appear until meta.json has ended_at_wall set."""
cfg = EpisodeConfig(
target_pid=os.getpid(),
duration_s=0.1,
interval_ms=50,
data_root=tmp_path,
)
result = EpisodeRunner(cfg).run()
assert (result.episode_dir / "done.marker").exists()
meta = json.loads((result.episode_dir / "meta.json").read_text())
assert meta["ended_at_wall"] is not None