End-to-end: ``python -m orchestrator --target-pid <pid> --duration N`` now
writes a complete episode directory matching docs/data-model.md, with phase
labels, events, and a 10 Hz host /proc telemetry stream. No VM yet — pid is
arbitrary so we can validate the loop against e.g. ``sleep 5`` while the lab
side comes up.
collectors/proc_qemu.py — parses /proc/<pid>/{stat,io,status} (handles parens
in comm), single-shot collect_once(), and a stop-event-driven run_loop()
that ticks at a fixed cadence and exits when the pid disappears. Tagged
``available_in_deployment: false`` per the threat-model doc.
orchestrator/episode.py — EpisodeRunner: creates data/episodes/<ulid>/,
atomic meta.json, events.jsonl + labels.jsonl writers, drives the collector
in a thread for duration_s, writes done.marker last so the shipper never
sees a half-finished episode.
orchestrator/ulid.py — tiny 26-char Crockford-base32 ULID generator.
Time-sortable, no third-party dep.
orchestrator/__main__.py — CLI entry point.
Tests (15 new, 28 total green):
- proc_qemu: real-ish stat with parens-in-comm, missing /proc/<pid>/io,
missing pid, run_loop cadence, run_loop terminates when pid disappears.
- episode: full directory shape against os.getpid(), id override,
done.marker written after meta.json finalize.
- ulid: length+alphabet, 2000-burst uniqueness, time-sortability.
Smoke-tested against ``sleep 10``: 16 rows over 1.5s at 100ms cadence,
monotonic clock, RSS stable at ~3.5 MiB as expected for an idle sleep.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
70 lines
1.8 KiB
Python
70 lines
1.8 KiB
Python
"""CLI for the v0 orchestrator: observe a pid for a fixed window."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from .episode import EpisodeConfig, EpisodeRunner
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
prog="cis490-orchestrator",
|
|
description="Run a single episode against a target pid.",
|
|
)
|
|
parser.add_argument(
|
|
"--target-pid",
|
|
type=int,
|
|
required=True,
|
|
help="pid to sample (later this will be the qemu-system pid)",
|
|
)
|
|
parser.add_argument(
|
|
"--duration",
|
|
type=float,
|
|
default=10.0,
|
|
help="seconds to observe (default 10)",
|
|
)
|
|
parser.add_argument(
|
|
"--interval-ms",
|
|
type=int,
|
|
default=100,
|
|
help="sampling interval (default 100ms = 10 Hz)",
|
|
)
|
|
parser.add_argument(
|
|
"--data-root",
|
|
default="data",
|
|
help="output directory root (default ./data)",
|
|
)
|
|
parser.add_argument(
|
|
"--episode-id",
|
|
default=None,
|
|
help="override ULID generation (mostly for tests)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
|
)
|
|
|
|
cfg = EpisodeConfig(
|
|
target_pid=args.target_pid,
|
|
duration_s=args.duration,
|
|
interval_ms=args.interval_ms,
|
|
data_root=Path(args.data_root),
|
|
episode_id=args.episode_id,
|
|
)
|
|
result = EpisodeRunner(cfg).run()
|
|
|
|
print(f"episode_id={result.episode_id}")
|
|
print(f"path={result.episode_dir}")
|
|
print(f"rows_proc={result.rows_proc}")
|
|
print(f"duration_observed_s={result.duration_observed_s:.2f}")
|
|
return 0 if not result.pid_disappeared else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|