Producers are event *sources* — the renderer is everything inside training/dashboard/. Sibling layout makes the dependency direction one-way (producers import from training.dashboard.events; dashboard never reaches into producers). training/dashboard/producers/ → training/producers/ Internal imports rewritten via sed; eval_/run.py and training/README.md cross-references updated. CLI entry stays via `python -m training.producers.<sub>` (replay / metrics / perf / profiles). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
220 lines
7.7 KiB
Python
220 lines
7.7 KiB
Python
"""Replay an episode at wall-clock time, emitting live dashboard events.
|
|
|
|
For one episode we emit:
|
|
phase — ground truth from labels.jsonl, on each transition
|
|
prediction — per-window predicted vs actual phase from one model
|
|
(the "primary" model, default: first GBT loaded)
|
|
embedding — 2-D PCA projection of each window for the KNN scatter
|
|
|
|
Producer is transport-agnostic via _publish.PublishFn. Models are
|
|
loaded via the schema-hashed checkpoint format — schema mismatch
|
|
between training and inference fails loud, not silent.
|
|
|
|
Both summary and tensor models are supported. The producer extracts
|
|
the right input flavor per model on demand:
|
|
- summary: summary_windows(epi)
|
|
- tensor: tensor_windows(epi)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
|
|
from training._episode_io import open_episode
|
|
from training._features import (
|
|
PHASE_TO_INT, summary_windows, tensor_windows,
|
|
)
|
|
from training.producers._models import (
|
|
load_models, model_display_name,
|
|
)
|
|
from training.producers._publish import (
|
|
PublishFn, http_publisher, null_publisher,
|
|
)
|
|
from training.models import BaseModel
|
|
|
|
|
|
log = logging.getLogger("cis490.dashboard.producers.replay")
|
|
|
|
|
|
def _pick_primary(models: list[BaseModel]) -> BaseModel | None:
|
|
"""Pick the model whose predictions drive the chunking widget. We
|
|
prefer a realistic-mode model since that's the one a deployed system
|
|
would run."""
|
|
if not models:
|
|
return None
|
|
# Prefer the realistic-mode model on a stable ranking by name.
|
|
rank = {"gbt": 0, "cnn": 1, "transformer": 2,
|
|
"gru": 3, "lstm": 4, "mlp": 5}
|
|
sorted_models = sorted(
|
|
models,
|
|
key=lambda m: (
|
|
0 if "realistic" in str(m.__class__.__name__).lower() else 1,
|
|
rank.get(m.__model_name__, 99),
|
|
),
|
|
)
|
|
return sorted_models[0]
|
|
|
|
|
|
async def replay_episode(
|
|
*,
|
|
publish: PublishFn,
|
|
episode_path: Path,
|
|
host_id: str,
|
|
models: list[BaseModel],
|
|
speed: float = 1.0,
|
|
) -> None:
|
|
epi = open_episode(episode_path, host_id=host_id)
|
|
if not epi.labels:
|
|
log.warning("episode %s has no labels — nothing to replay", episode_path)
|
|
return
|
|
|
|
# Build inputs for each input_kind once.
|
|
inputs: dict[str, dict] = {}
|
|
if any(m.input_kind == "summary" for m in models):
|
|
Xs, ys, ts, _ = summary_windows(epi)
|
|
inputs["summary"] = {"X": Xs, "y": ys, "t": ts}
|
|
if any(m.input_kind == "tensor" for m in models):
|
|
Xt, yt, tt, _, _ = tensor_windows(epi)
|
|
inputs["tensor"] = {"X": Xt, "y": yt, "t": tt}
|
|
|
|
# Time alignment uses tensor's t if present (most fine-grained); fall
|
|
# back to summary.
|
|
ref = inputs.get("tensor") or inputs.get("summary")
|
|
if ref is None or ref["X"].shape[0] == 0:
|
|
log.warning("no usable windows for %s", episode_path)
|
|
return
|
|
n_w = ref["X"].shape[0]
|
|
t_centers = ref["t"]
|
|
y_actual = ref["y"]
|
|
|
|
# Phase ground-truth events from labels.jsonl
|
|
label_events: list[tuple[float, str]] = []
|
|
t0 = int(epi.labels[0]["t_wall_ns"])
|
|
for L in epi.labels:
|
|
label_events.append(((L["t_wall_ns"] - t0) / 1e9, L["phase"]))
|
|
|
|
int_to_phase = {i: p for p, i in PHASE_TO_INT.items()}
|
|
primary = _pick_primary(models)
|
|
if primary is None:
|
|
log.info("no models loaded; emitting phase + embedding only")
|
|
|
|
log.info("replay start: %d windows, %d models, primary=%s",
|
|
n_w, len(models),
|
|
model_display_name(primary) if primary else None)
|
|
|
|
start_wall = time.monotonic()
|
|
label_cursor = 0
|
|
|
|
for w in range(n_w):
|
|
target_wall = start_wall + float(t_centers[w]) / speed
|
|
delay = target_wall - time.monotonic()
|
|
if delay > 0:
|
|
await asyncio.sleep(delay)
|
|
|
|
# Phase events for any label transitions whose time has passed
|
|
while (label_cursor < len(label_events)
|
|
and label_events[label_cursor][0] <= float(t_centers[w])):
|
|
phase_name = label_events[label_cursor][1]
|
|
await publish({"type": "phase", "phase": phase_name})
|
|
label_cursor += 1
|
|
|
|
actual_name = int_to_phase.get(int(y_actual[w]), "clean")
|
|
|
|
# Predictions: only the primary's prediction goes to chunking widget
|
|
if primary is not None:
|
|
X_one = inputs[primary.input_kind]["X"][w:w + 1]
|
|
try:
|
|
pred = int(primary.predict(X_one)[0])
|
|
pred_name = int_to_phase.get(pred, "clean")
|
|
except Exception as e:
|
|
log.warning("predict failed: %s", e)
|
|
pred_name = actual_name
|
|
await publish({
|
|
"type": "prediction",
|
|
"episode_id": epi.episode_id,
|
|
"window_idx": w,
|
|
"predicted": pred_name,
|
|
"actual": actual_name,
|
|
"model": primary.__model_name__,
|
|
})
|
|
|
|
# Embedding: project the primary's standardized window through
|
|
# its saved PCA-2 (loaded from the checkpoint header). If the
|
|
# primary doesn't have a projection, skip embedding for this
|
|
# window.
|
|
if primary is not None:
|
|
xy = _project_one(primary, X_one)
|
|
if xy is not None:
|
|
await publish({
|
|
"type": "embedding",
|
|
"x": float(xy[0]), "y": float(xy[1]),
|
|
"phase": actual_name,
|
|
})
|
|
|
|
|
|
def _project_one(model: BaseModel, X_one: np.ndarray) -> tuple[float, float] | None:
|
|
"""Apply the model's standardize+keep, then project through the
|
|
PCA-2 baked into the checkpoint header (if any). Returns (x, y) in
|
|
[0, 1] using a min-max squash with stats fit on first call."""
|
|
pca = getattr(model, "_pca_proj", None)
|
|
if pca is None:
|
|
return None
|
|
Xk = model.select(X_one[:1])
|
|
if Xk.ndim == 3:
|
|
Xk = Xk.reshape(1, -1)
|
|
if Xk.shape[1] != pca.shape[0]:
|
|
return None
|
|
p = (Xk @ pca).ravel()
|
|
# Tanh-squash with k=0.05 so most points land in (0.2, 0.8). Without
|
|
# train-time min/max it's the cleanest stateless squash.
|
|
return (
|
|
0.5 + 0.5 * float(np.tanh(0.05 * p[0])),
|
|
0.5 + 0.5 * float(np.tanh(0.05 * p[1])),
|
|
)
|
|
|
|
|
|
async def _run(args: argparse.Namespace) -> int:
|
|
logging.basicConfig(level=logging.INFO,
|
|
format="%(asctime)s %(levelname)s %(name)s %(message)s")
|
|
|
|
models = load_models(args.artifacts, device=args.device)
|
|
# Hydrate PCA projection from each checkpoint header
|
|
from training.models._checkpoint import load_header
|
|
paths = sorted(Path(args.artifacts).glob("*.ckpt.json"))
|
|
for m, p in zip(models, paths):
|
|
header = load_header(p)
|
|
if header.get("pca_proj") is not None:
|
|
m._pca_proj = np.asarray(header["pca_proj"], dtype=np.float32)
|
|
|
|
publisher = (null_publisher() if args.dry_run
|
|
else http_publisher(args.publish_url))
|
|
await replay_episode(
|
|
publish=publisher, episode_path=args.episode,
|
|
host_id=args.host_id, models=models, speed=args.speed,
|
|
)
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--episode", required=True, type=Path)
|
|
ap.add_argument("--host-id", required=True)
|
|
ap.add_argument("--artifacts", type=Path, default=Path("artifacts"))
|
|
ap.add_argument("--publish-url", default="http://127.0.0.1:8447/publish")
|
|
ap.add_argument("--speed", type=float, default=1.0)
|
|
ap.add_argument("--device", default="auto")
|
|
ap.add_argument("--dry-run", action="store_true")
|
|
args = ap.parse_args()
|
|
return asyncio.run(_run(args))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|