Add v0 orchestrator + first oracle collector (host /proc)

End-to-end: ``python -m orchestrator --target-pid <pid> --duration N`` now
writes a complete episode directory matching docs/data-model.md, with phase
labels, events, and a 10 Hz host /proc telemetry stream. No VM yet — pid is
arbitrary so we can validate the loop against e.g. ``sleep 5`` while the lab
side comes up.

collectors/proc_qemu.py — parses /proc/<pid>/{stat,io,status} (handles parens
in comm), single-shot collect_once(), and a stop-event-driven run_loop()
that ticks at a fixed cadence and exits when the pid disappears. Tagged
``available_in_deployment: false`` per the threat-model doc.

orchestrator/episode.py — EpisodeRunner: creates data/episodes/<ulid>/,
atomic meta.json, events.jsonl + labels.jsonl writers, drives the collector
in a thread for duration_s, writes done.marker last so the shipper never
sees a half-finished episode.

orchestrator/ulid.py — tiny 26-char Crockford-base32 ULID generator.
Time-sortable, no third-party dep.

orchestrator/__main__.py — CLI entry point.

Tests (15 new, 28 total green):
- proc_qemu: real-ish stat with parens-in-comm, missing /proc/<pid>/io,
  missing pid, run_loop cadence, run_loop terminates when pid disappears.
- episode: full directory shape against os.getpid(), id override,
  done.marker written after meta.json finalize.
- ulid: length+alphabet, 2000-burst uniqueness, time-sortability.

Smoke-tested against ``sleep 10``: 16 rows over 1.5s at 100ms cadence,
monotonic clock, RSS stable at ~3.5 MiB as expected for an idle sleep.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Maximus Gorog 2026-04-28 23:40:25 -06:00
parent 83e111961d
commit 064387b7a0
9 changed files with 780 additions and 0 deletions

0
collectors/__init__.py Normal file
View file

189
collectors/proc_qemu.py Normal file
View file

@ -0,0 +1,189 @@
"""Source 1 (oracle): host /proc/<pid> sampler.
Polls /proc/<pid>/{stat,io,status} at a fixed interval and emits one JSONL
row per tick into the episode telemetry file.
This source is **oracle-only** it does not exist on a deployed device, so
every row is tagged ``available_in_deployment: false``. See
``docs/threat-model.md``.
"""
from __future__ import annotations
import json
import logging
import os
import threading
import time
from dataclasses import dataclass
from pathlib import Path
log = logging.getLogger("cis490.collectors.proc_qemu")
SOURCE = "host_proc"
AVAILABLE_IN_DEPLOYMENT = False
PAGE_SIZE = os.sysconf("SC_PAGESIZE")
@dataclass(frozen=True)
class ProcStat:
minflt: int
majflt: int
utime: int # clock ticks (jiffies) in user mode
stime: int # clock ticks (jiffies) in kernel mode
vsize: int # bytes
rss_pages: int # in pages — multiply by PAGE_SIZE
@dataclass(frozen=True)
class ProcIO:
read_bytes: int
write_bytes: int
@dataclass(frozen=True)
class ProcStatus:
voluntary_ctxsw: int
involuntary_ctxsw: int
def parse_proc_stat(data: str) -> ProcStat:
"""Parse the contents of /proc/<pid>/stat.
The ``comm`` field (field 2) can contain spaces and parens, so we anchor
the split on the rightmost ')'. After that, fields are positional per
``man 5 proc``.
"""
rparen = data.rindex(")")
# Skip ") " after the comm to land on the state field.
fields = data[rparen + 2 :].split()
# Index in `fields`: man 5 proc field number (1-indexed)
# 0 state 3
# 1 ppid 4
# 2 pgrp 5
# 3 session 6
# 4 tty_nr 7
# 5 tpgid 8
# 6 flags 9
# 7 minflt 10
# 8 cminflt 11
# 9 majflt 12
# 10 cmajflt 13
# 11 utime 14
# 12 stime 15
# ...
# 20 vsize 23
# 21 rss (pages) 24
return ProcStat(
minflt=int(fields[7]),
majflt=int(fields[9]),
utime=int(fields[11]),
stime=int(fields[12]),
vsize=int(fields[20]),
rss_pages=int(fields[21]),
)
def parse_proc_io(data: str) -> ProcIO:
"""Parse /proc/<pid>/io. Requires same uid (or CAP_SYS_PTRACE) to read."""
out = {}
for line in data.splitlines():
k, _, v = line.partition(":")
out[k.strip()] = int(v.strip())
return ProcIO(read_bytes=out["read_bytes"], write_bytes=out["write_bytes"])
def parse_proc_status(data: str) -> ProcStatus:
"""Parse /proc/<pid>/status — only the two ctxsw fields we care about."""
vol = nvol = 0
for line in data.splitlines():
if line.startswith("voluntary_ctxt_switches:"):
vol = int(line.split(":", 1)[1].strip())
elif line.startswith("nonvoluntary_ctxt_switches:"):
nvol = int(line.split(":", 1)[1].strip())
return ProcStatus(voluntary_ctxsw=vol, involuntary_ctxsw=nvol)
def _read_text(path: str) -> str | None:
try:
with open(path, "rb") as f:
return f.read().decode("ascii", errors="replace")
except (FileNotFoundError, ProcessLookupError, PermissionError):
return None
def collect_once(
pid: int,
t_mono_origin_ns: int,
*,
proc_root: str = "/proc",
) -> dict | None:
"""One sample. Returns None if the target pid is gone.
``proc_root`` is overridable for tests against a synthetic /proc tree.
"""
stat_text = _read_text(f"{proc_root}/{pid}/stat")
if stat_text is None:
return None
stat = parse_proc_stat(stat_text)
io_text = _read_text(f"{proc_root}/{pid}/io")
io = parse_proc_io(io_text) if io_text is not None else None
status_text = _read_text(f"{proc_root}/{pid}/status")
status = parse_proc_status(status_text) if status_text is not None else None
return {
"t_mono_ns": time.monotonic_ns() - t_mono_origin_ns,
"t_wall_ns": time.time_ns(),
"source": SOURCE,
"available_in_deployment": AVAILABLE_IN_DEPLOYMENT,
"cpu_user_jiffies": stat.utime,
"cpu_sys_jiffies": stat.stime,
"rss_bytes": stat.rss_pages * PAGE_SIZE,
"vsize_bytes": stat.vsize,
"io_read_bytes": io.read_bytes if io is not None else None,
"io_write_bytes": io.write_bytes if io is not None else None,
"voluntary_ctxsw": status.voluntary_ctxsw if status is not None else None,
"involuntary_ctxsw": status.involuntary_ctxsw if status is not None else None,
"minor_faults": stat.minflt,
"major_faults": stat.majflt,
}
def run_loop(
pid: int,
output_path: Path,
t_mono_origin_ns: int,
interval_ms: int,
stop_event: threading.Event,
*,
proc_root: str = "/proc",
) -> int:
"""Sample at a fixed cadence until stop_event is set or pid disappears.
Returns the number of rows written.
"""
interval_ns = interval_ms * 1_000_000
next_tick = time.monotonic_ns()
rows = 0
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("a", buffering=1) as f: # line-buffered
while not stop_event.is_set():
row = collect_once(pid, t_mono_origin_ns, proc_root=proc_root)
if row is None:
log.info("target pid %d disappeared; stopping", pid)
break
f.write(json.dumps(row) + "\n")
rows += 1
next_tick += interval_ns
sleep_ns = next_tick - time.monotonic_ns()
if sleep_ns > 0:
# Use the event's wait so SIGTERM/stop is responsive.
stop_event.wait(sleep_ns / 1_000_000_000)
else:
# We are behind schedule; resync.
next_tick = time.monotonic_ns()
return rows

0
orchestrator/__init__.py Normal file
View file

70
orchestrator/__main__.py Normal file
View file

@ -0,0 +1,70 @@
"""CLI for the v0 orchestrator: observe a pid for a fixed window."""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
from .episode import EpisodeConfig, EpisodeRunner
def main() -> int:
parser = argparse.ArgumentParser(
prog="cis490-orchestrator",
description="Run a single episode against a target pid.",
)
parser.add_argument(
"--target-pid",
type=int,
required=True,
help="pid to sample (later this will be the qemu-system pid)",
)
parser.add_argument(
"--duration",
type=float,
default=10.0,
help="seconds to observe (default 10)",
)
parser.add_argument(
"--interval-ms",
type=int,
default=100,
help="sampling interval (default 100ms = 10 Hz)",
)
parser.add_argument(
"--data-root",
default="data",
help="output directory root (default ./data)",
)
parser.add_argument(
"--episode-id",
default=None,
help="override ULID generation (mostly for tests)",
)
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
cfg = EpisodeConfig(
target_pid=args.target_pid,
duration_s=args.duration,
interval_ms=args.interval_ms,
data_root=Path(args.data_root),
episode_id=args.episode_id,
)
result = EpisodeRunner(cfg).run()
print(f"episode_id={result.episode_id}")
print(f"path={result.episode_dir}")
print(f"rows_proc={result.rows_proc}")
print(f"duration_observed_s={result.duration_observed_s:.2f}")
return 0 if not result.pid_disappeared else 1
if __name__ == "__main__":
sys.exit(main())

207
orchestrator/episode.py Normal file
View file

@ -0,0 +1,207 @@
"""EpisodeRunner — minimum-viable single-episode driver.
This v0 runner does NOT boot a VM yet. It samples a target pid that you
provide on the command line, labels the entire window as ``clean``, and
writes the full per-episode directory shape from ``docs/data-model.md``:
data/episodes/<ulid>/
meta.json
events.jsonl
labels.jsonl
telemetry-proc.jsonl
done.marker
The point of v0 is to validate the directory shape, the JSONL schemas, the
collector loop, and (next step) the shipper, *before* the VM lab is wired
up. Once the VM bring-up exists, the runner will be extended with phase
transitions (armed infecting infected_running dormant).
"""
from __future__ import annotations
import json
import logging
import os
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from collectors import proc_qemu
from .ulid import new_ulid
log = logging.getLogger("cis490.orchestrator")
SCHEMA_VERSION = 1
@dataclass
class EpisodeConfig:
target_pid: int
duration_s: float
data_root: Path
interval_ms: int = 100
image_name: str = "(stub)"
snapshot_name: str = "(none-stub)"
episode_id: str | None = None
@dataclass
class EpisodeResult:
episode_id: str
episode_dir: Path
rows_proc: int
pid_disappeared: bool
duration_observed_s: float
class EpisodeRunner:
def __init__(self, cfg: EpisodeConfig) -> None:
self.cfg = cfg
self.episode_id = cfg.episode_id or new_ulid()
self.episode_dir: Path = cfg.data_root / "episodes" / self.episode_id
self._t_mono_origin_ns: int = 0
self._stop = threading.Event()
# ---- public ---------------------------------------------------------
def run(self) -> EpisodeResult:
self.episode_dir.mkdir(parents=True, exist_ok=True)
self._t_mono_origin_ns = time.monotonic_ns()
started_at_wall = datetime.now(timezone.utc).isoformat()
meta = self._initial_meta(started_at_wall)
self._write_meta(meta)
self._emit_event(0, "snapshot_load", snapshot=self.cfg.snapshot_name)
self._emit_label(0, "clean", prev=None, reason="snapshot_loaded")
rows_holder: dict[str, int] = {"rows": 0}
def _collector() -> None:
rows_holder["rows"] = proc_qemu.run_loop(
pid=self.cfg.target_pid,
output_path=self.episode_dir / "telemetry-proc.jsonl",
t_mono_origin_ns=self._t_mono_origin_ns,
interval_ms=self.cfg.interval_ms,
stop_event=self._stop,
)
t = threading.Thread(target=_collector, daemon=True, name="proc_qemu")
t.start()
try:
# Wait either for duration to elapse or for stop to be set.
self._stop.wait(timeout=self.cfg.duration_s)
finally:
self._stop.set()
t.join(timeout=2.0)
end_mono_ns = time.monotonic_ns() - self._t_mono_origin_ns
pid_alive = _pid_alive(self.cfg.target_pid)
self._emit_event(
end_mono_ns,
"episode_end",
target_pid_alive=pid_alive,
)
meta["ended_at_wall"] = datetime.now(timezone.utc).isoformat()
meta["result"] = {
"phases_observed": ["clean"],
"rows_proc": rows_holder["rows"],
"pid_alive_at_end": pid_alive,
"duration_observed_s": end_mono_ns / 1_000_000_000,
}
self._write_meta(meta)
(self.episode_dir / "done.marker").touch()
log.info(
"episode %s complete: rows=%d duration=%.2fs",
self.episode_id, rows_holder["rows"], end_mono_ns / 1e9,
)
return EpisodeResult(
episode_id=self.episode_id,
episode_dir=self.episode_dir,
rows_proc=rows_holder["rows"],
pid_disappeared=not pid_alive,
duration_observed_s=end_mono_ns / 1_000_000_000,
)
def stop(self) -> None:
self._stop.set()
# ---- internals ------------------------------------------------------
def _initial_meta(self, started_at_wall: str) -> dict:
return {
"episode_id": self.episode_id,
"schema_version": SCHEMA_VERSION,
"started_at_wall": started_at_wall,
"ended_at_wall": None,
"host_fingerprint": {
"kernel": os.uname().release,
"qemu_version": None,
},
"vm": {
"image_name": self.cfg.image_name,
"image_sha256": None,
"snapshot_name": self.cfg.snapshot_name,
"vcpus": None,
"ram_mib": None,
"target_pid": self.cfg.target_pid,
},
"exploit": None,
"sample": None,
"schedule": {
"baseline_seconds": self.cfg.duration_s,
"infected_seconds": 0,
"dormant_seconds": 0,
"interval_ms": self.cfg.interval_ms,
},
"result": None,
}
def _write_meta(self, meta: dict) -> None:
path = self.episode_dir / "meta.json"
tmp = path.with_suffix(".json.partial")
with tmp.open("w") as f:
json.dump(meta, f, indent=2, sort_keys=True)
f.write("\n")
os.replace(tmp, path)
def _emit_event(self, t_mono_ns: int, event: str, **extra) -> None:
row = {
"t_mono_ns": t_mono_ns,
"t_wall_ns": time.time_ns(),
"event": event,
**extra,
}
with (self.episode_dir / "events.jsonl").open("a") as f:
f.write(json.dumps(row, sort_keys=True) + "\n")
def _emit_label(self, t_mono_ns: int, phase: str, prev: str | None, reason: str) -> None:
row = {
"t_mono_ns": t_mono_ns,
"t_wall_ns": time.time_ns(),
"phase": phase,
"prev": prev,
"reason": reason,
}
with (self.episode_dir / "labels.jsonl").open("a") as f:
f.write(json.dumps(row, sort_keys=True) + "\n")
def _pid_alive(pid: int) -> bool:
try:
os.kill(pid, 0)
return True
except (ProcessLookupError, PermissionError):
# PermissionError means the pid exists but we can't signal it.
return isinstance(_last_exception(), PermissionError)
def _last_exception() -> BaseException | None:
import sys
return sys.exc_info()[1]

21
orchestrator/ulid.py Normal file
View file

@ -0,0 +1,21 @@
"""Tiny ULID generator. 26-char Crockford base32, time-sortable."""
from __future__ import annotations
import os
import time
_CROCKFORD = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
def new_ulid(now_ms: int | None = None) -> str:
"""Return a fresh ULID. ``now_ms`` is overridable for tests."""
ms = (now_ms if now_ms is not None else int(time.time() * 1000)) & ((1 << 48) - 1)
raw = ms.to_bytes(6, "big") + os.urandom(10)
n = int.from_bytes(raw, "big")
out = []
for _ in range(26):
out.append(_CROCKFORD[n & 31])
n >>= 5
return "".join(reversed(out))

88
tests/test_episode.py Normal file
View file

@ -0,0 +1,88 @@
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from orchestrator.episode import EpisodeConfig, EpisodeRunner
def _read_jsonl(p: Path) -> list[dict]:
return [json.loads(l) for l in p.read_text().splitlines()]
def test_episode_against_self_pid_produces_full_directory(tmp_path: Path) -> None:
cfg = EpisodeConfig(
target_pid=os.getpid(),
duration_s=0.5,
interval_ms=50,
data_root=tmp_path,
)
result = EpisodeRunner(cfg).run()
d = result.episode_dir
assert d.exists()
assert (d / "meta.json").exists()
assert (d / "events.jsonl").exists()
assert (d / "labels.jsonl").exists()
assert (d / "telemetry-proc.jsonl").exists()
assert (d / "done.marker").exists()
# meta.json structure
meta = json.loads((d / "meta.json").read_text())
assert meta["episode_id"] == result.episode_id
assert meta["schema_version"] == 1
assert meta["started_at_wall"] is not None
assert meta["ended_at_wall"] is not None
assert meta["vm"]["target_pid"] == os.getpid()
assert meta["schedule"]["baseline_seconds"] == 0.5
assert meta["schedule"]["interval_ms"] == 50
assert meta["result"]["rows_proc"] == result.rows_proc
assert "clean" in meta["result"]["phases_observed"]
# labels.jsonl: at least one clean label at t=0.
labels = _read_jsonl(d / "labels.jsonl")
assert any(r["phase"] == "clean" and r["t_mono_ns"] == 0 for r in labels)
# events.jsonl: snapshot_load + episode_end.
events = _read_jsonl(d / "events.jsonl")
event_names = [e["event"] for e in events]
assert "snapshot_load" in event_names
assert "episode_end" in event_names
# telemetry-proc.jsonl: roughly 10 ticks @ 50ms over 500ms.
proc_rows = _read_jsonl(d / "telemetry-proc.jsonl")
assert len(proc_rows) >= 5
for row in proc_rows:
assert row["source"] == "host_proc"
assert row["available_in_deployment"] is False
assert row["rss_bytes"] > 0
def test_episode_id_can_be_overridden(tmp_path: Path) -> None:
cfg = EpisodeConfig(
target_pid=os.getpid(),
duration_s=0.1,
interval_ms=50,
data_root=tmp_path,
episode_id="01TEST",
)
result = EpisodeRunner(cfg).run()
assert result.episode_id == "01TEST"
assert result.episode_dir == tmp_path / "episodes" / "01TEST"
def test_episode_writes_done_marker_last(tmp_path: Path) -> None:
"""done.marker should not appear until meta.json has ended_at_wall set."""
cfg = EpisodeConfig(
target_pid=os.getpid(),
duration_s=0.1,
interval_ms=50,
data_root=tmp_path,
)
result = EpisodeRunner(cfg).run()
assert (result.episode_dir / "done.marker").exists()
meta = json.loads((result.episode_dir / "meta.json").read_text())
assert meta["ended_at_wall"] is not None

185
tests/test_proc_qemu.py Normal file
View file

@ -0,0 +1,185 @@
from __future__ import annotations
import json
import os
import threading
import time
from pathlib import Path
import pytest
from collectors.proc_qemu import (
PAGE_SIZE,
collect_once,
parse_proc_io,
parse_proc_stat,
parse_proc_status,
run_loop,
)
# A real-ish /proc/<pid>/stat line where comm contains both spaces and parens.
SAMPLE_STAT = (
"1234 (weird (comm) name) S 1 1234 1234 0 -1 4194304 "
"412 0 3 0 " # minflt cminflt majflt cmajflt
"142 38 0 0 " # utime stime cutime cstime
"20 0 1 0 " # priority nice num_threads itrealvalue
"100000000 " # starttime
"1842933760 " # vsize
"132453 " # rss (pages)
"18446744073709551615 1 1 0 0 0 0 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"
)
SAMPLE_IO = (
"rchar: 12345\n"
"wchar: 4096\n"
"syscr: 100\n"
"syscw: 50\n"
"read_bytes: 8192\n"
"write_bytes: 4096\n"
"cancelled_write_bytes: 0\n"
)
SAMPLE_STATUS = """\
Name: bash
Umask: 0022
State: S (sleeping)
Tgid: 1234
VmPeak: 12345 kB
VmSize: 10000 kB
VmRSS: 1024 kB
voluntary_ctxt_switches: 12
nonvoluntary_ctxt_switches: 3
"""
def test_parse_proc_stat_handles_parens_in_comm() -> None:
s = parse_proc_stat(SAMPLE_STAT)
assert s.minflt == 412
assert s.majflt == 3
assert s.utime == 142
assert s.stime == 38
assert s.vsize == 1842933760
assert s.rss_pages == 132453
def test_parse_proc_io() -> None:
io = parse_proc_io(SAMPLE_IO)
assert io.read_bytes == 8192
assert io.write_bytes == 4096
def test_parse_proc_status() -> None:
s = parse_proc_status(SAMPLE_STATUS)
assert s.voluntary_ctxsw == 12
assert s.involuntary_ctxsw == 3
def _write_synthetic_proc(root: Path, pid: int, *, with_io: bool = True) -> None:
pdir = root / str(pid)
pdir.mkdir(parents=True, exist_ok=True)
(pdir / "stat").write_text(SAMPLE_STAT)
(pdir / "status").write_text(SAMPLE_STATUS)
if with_io:
(pdir / "io").write_text(SAMPLE_IO)
def test_collect_once_against_synthetic_proc(tmp_path: Path) -> None:
_write_synthetic_proc(tmp_path, 1234)
row = collect_once(1234, t_mono_origin_ns=0, proc_root=str(tmp_path))
assert row is not None
assert row["source"] == "host_proc"
assert row["available_in_deployment"] is False
assert row["cpu_user_jiffies"] == 142
assert row["cpu_sys_jiffies"] == 38
assert row["rss_bytes"] == 132453 * PAGE_SIZE
assert row["vsize_bytes"] == 1842933760
assert row["io_read_bytes"] == 8192
assert row["io_write_bytes"] == 4096
assert row["voluntary_ctxsw"] == 12
assert row["involuntary_ctxsw"] == 3
assert row["minor_faults"] == 412
assert row["major_faults"] == 3
assert row["t_mono_ns"] >= 0
assert row["t_wall_ns"] > 0
def test_collect_once_handles_missing_io(tmp_path: Path) -> None:
_write_synthetic_proc(tmp_path, 1234, with_io=False)
row = collect_once(1234, t_mono_origin_ns=0, proc_root=str(tmp_path))
assert row is not None
assert row["io_read_bytes"] is None
assert row["io_write_bytes"] is None
def test_collect_once_returns_none_for_missing_pid(tmp_path: Path) -> None:
row = collect_once(99999, t_mono_origin_ns=0, proc_root=str(tmp_path))
assert row is None
def test_collect_once_against_real_self() -> None:
"""Smoke: read our own /proc entry. Most fields will be present."""
row = collect_once(os.getpid(), t_mono_origin_ns=time.monotonic_ns())
assert row is not None
assert row["source"] == "host_proc"
assert row["rss_bytes"] > 0
assert row["vsize_bytes"] > 0
def test_run_loop_writes_rows_until_stopped(tmp_path: Path) -> None:
_write_synthetic_proc(tmp_path, 1234)
out = tmp_path / "telemetry-proc.jsonl"
stop = threading.Event()
def stopper():
time.sleep(0.25)
stop.set()
t = threading.Thread(target=stopper)
t.start()
rows = run_loop(
pid=1234,
output_path=out,
t_mono_origin_ns=time.monotonic_ns(),
interval_ms=50,
stop_event=stop,
proc_root=str(tmp_path),
)
t.join()
assert rows >= 3 # roughly 5 ticks @ 50ms over 250ms
lines = out.read_text().splitlines()
assert len(lines) == rows
# Each line is valid JSON with the expected schema.
for line in lines:
row = json.loads(line)
assert row["source"] == "host_proc"
assert row["available_in_deployment"] is False
assert row["cpu_user_jiffies"] == 142
def test_run_loop_stops_when_pid_disappears(tmp_path: Path) -> None:
_write_synthetic_proc(tmp_path, 1234)
out = tmp_path / "telemetry-proc.jsonl"
stop = threading.Event()
def vanish():
time.sleep(0.1)
# Delete the synthetic /proc/<pid>/ to simulate exit.
for f in (tmp_path / "1234").iterdir():
f.unlink()
(tmp_path / "1234").rmdir()
t = threading.Thread(target=vanish)
t.start()
rows = run_loop(
pid=1234,
output_path=out,
t_mono_origin_ns=time.monotonic_ns(),
interval_ms=20,
stop_event=stop,
proc_root=str(tmp_path),
)
t.join()
assert rows >= 1
# Loop terminated even though stop was never set.
assert not stop.is_set()

20
tests/test_ulid.py Normal file
View file

@ -0,0 +1,20 @@
from __future__ import annotations
from orchestrator.ulid import new_ulid
def test_ulid_length_and_alphabet() -> None:
u = new_ulid()
assert len(u) == 26
assert all(c in "0123456789ABCDEFGHJKMNPQRSTVWXYZ" for c in u)
def test_ulid_uniqueness_burst() -> None:
seen = {new_ulid() for _ in range(2000)}
assert len(seen) == 2000
def test_ulid_time_sortable() -> None:
earlier = new_ulid(now_ms=1_700_000_000_000)
later = new_ulid(now_ms=1_700_000_001_000)
assert earlier < later