From a04bba62817f01602f0f8b3d297977dcd414b589 Mon Sep 17 00:00:00 2001 From: Max Gorog Date: Fri, 8 May 2026 01:16:54 -0500 Subject: [PATCH] =?UTF-8?q?training/dashboard:=20click=20a=20db=20row=20?= =?UTF-8?q?=E2=86=92=20render=20the=20episode=20envelope?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New endpoint GET /api/episode// in app.py. Stream-decompresses the tarball (zstd -dc piped into tarfile), extracts telemetry-proc.jsonl, labels.jsonl, and meta.json, returns the parsed contents. Synchronous extract runs in asyncio.to_thread so the event loop isn't blocked. Frontend: clicking a row in the database explorer now fetches the episode and draws an SVG chart matching the README's Real Alpine VM envelope shape: - per-interval CPU jiffies delta (user + sys) - per-interval IO bytes delta (read + write) - colored phase bands (clean/armed/infecting/infected_running/ dormant) overlaid by labels.jsonl - axis ticks for 0-peak on Y, 0-totalDuration in seconds on X - legend below the chart with palette-driven swatches The detail panel that previously showed the row JSON now shows metadata + the chart + the legend. Validated end-to-end against a real episode (863 samples, 8 labels) extracted from /var/lib/cis490/episodes/elliott-thinkpad/. --- training/dashboard/app.py | 106 +++++++++++++++++++ training/dashboard/static/dashboard.css | 55 +++++++++- training/dashboard/static/dashboard.js | 135 ++++++++++++++++++++++-- training/dashboard/static/index.html | 11 +- 4 files changed, 292 insertions(+), 15 deletions(-) diff --git a/training/dashboard/app.py b/training/dashboard/app.py index 6091b05..6087aae 100644 --- a/training/dashboard/app.py +++ b/training/dashboard/app.py @@ -3,6 +3,9 @@ from __future__ import annotations import asyncio import json import logging +import re +import subprocess +import tarfile from contextlib import asynccontextmanager from pathlib import Path from typing import Any @@ -19,6 +22,94 @@ log = logging.getLogger("cis490.dashboard") STATIC_DIR = Path(__file__).parent / "static" +# Used to validate URL-supplied host_id / episode_id before they +# reach the filesystem. Allows the alphanumeric ULID episode IDs +# the orchestrator produces and reasonable host names. Anything +# with `..`, `/`, or other path-traversal characters is rejected. +SAFE_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,128}$") + + +def _load_episode_sync( + data_root: Path, host_id: str, episode_id: str +) -> dict[str, Any] | None: + """Stream-decompress an episode tarball and parse the JSONL files + inside it. Returns ``None`` if the episode doesn't exist or the + IDs are unsafe. Synchronous; the route wraps this in + ``asyncio.to_thread`` so the event loop isn't blocked by the + decompress + parse.""" + if not (SAFE_ID_RE.match(host_id) and SAFE_ID_RE.match(episode_id)): + return None + path = data_root / "episodes" / host_id / f"{episode_id}.tar.zst" + if not path.is_file(): + return None + + samples: list[dict] = [] + labels: list[dict] = [] + meta: dict | None = None + + proc = subprocess.Popen( + ["zstd", "-dc", str(path)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + try: + with tarfile.open(fileobj=proc.stdout, mode="r|") as tar: + for member in tar: + if not member.isfile(): + continue + name = member.name.rsplit("/", 1)[-1] + if name not in ("telemetry-proc.jsonl", + "labels.jsonl", + "meta.json"): + continue + f = tar.extractfile(member) + if f is None: + continue + data = f.read() + if name == "telemetry-proc.jsonl": + for line in data.splitlines(): + line = line.strip() + if not line: + continue + try: + samples.append(json.loads(line)) + except json.JSONDecodeError: + pass + elif name == "labels.jsonl": + for line in data.splitlines(): + line = line.strip() + if not line: + continue + try: + labels.append(json.loads(line)) + except json.JSONDecodeError: + pass + elif name == "meta.json": + try: + meta = json.loads(data) + except json.JSONDecodeError: + pass + finally: + if proc.stdout: + proc.stdout.close() + rc = proc.wait() + if rc != 0: + try: + err = proc.stderr.read().decode("utf-8", errors="replace") + log.warning("zstd %s exit %d: %s", path, rc, err[:200]) + except Exception: + pass + if proc.stderr: + proc.stderr.close() + + return { + "host_id": host_id, + "episode_id": episode_id, + "samples": samples, + "labels": labels, + "meta": meta, + } + class Broadcaster: """Tiny fan-out hub. Per-client async queues, oldest-message-drop @@ -133,10 +224,25 @@ def make_app( finally: await broadcaster.unregister(q) + async def episode(request: Request) -> JSONResponse: + host_id = request.path_params["host_id"] + episode_id = request.path_params["episode_id"] + try: + result = await asyncio.to_thread( + _load_episode_sync, data_root, host_id, episode_id + ) + except Exception: + log.exception("episode load failed for %s/%s", host_id, episode_id) + return JSONResponse({"error": "load failed"}, status_code=500) + if result is None: + return JSONResponse({"error": "episode not found"}, status_code=404) + return JSONResponse(result) + routes = [ Route("/", index, methods=["GET"]), Route("/healthz", healthz, methods=["GET"]), Route("/publish", publish, methods=["POST"]), + Route("/api/episode/{host_id}/{episode_id}", episode, methods=["GET"]), WebSocketRoute("/ws", ws_endpoint), Mount("/static", app=StaticFiles(directory=str(STATIC_DIR)), name="static"), ] diff --git a/training/dashboard/static/dashboard.css b/training/dashboard/static/dashboard.css index f25ecee..ffc6128 100644 --- a/training/dashboard/static/dashboard.css +++ b/training/dashboard/static/dashboard.css @@ -718,15 +718,60 @@ html, body { overflow-anchor: none; } .db-host { color: var(--fg); } .db-id { color: var(--fg-dim); } .db-detail { - max-height: clamp(180px, 30vh, 360px); overflow: auto; border: 1px solid var(--line); border-radius: 4px; background: var(--bg-elev); + overflow: hidden; + display: flex; flex-direction: column; } -.db-detail pre { - margin: 0; padding: 14px 18px; +.db-detail[hidden] { display: none; } +.db-detail-meta { + padding: 8px 14px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; - font-size: 12px; line-height: 1.5; color: var(--fg); - white-space: pre-wrap; word-break: break-all; + font-size: 12px; color: var(--fg-dim); + border-bottom: 1px solid var(--line-soft); + display: flex; gap: 14px; flex-wrap: wrap; +} +.db-detail-meta .db-id { color: var(--fg); } +.db-detail-chart-wrap { + background: var(--bg-elev2); + width: 100%; + position: relative; +} +.db-detail-chart { + display: block; + width: 100%; + height: clamp(220px, 32vh, 420px); +} +.db-detail-chart .axis { stroke: var(--line); stroke-width: 1; } +.db-detail-chart .tick { + fill: var(--fg-mute); font-size: 10px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} +.db-detail-chart .metric-line { + fill: none; stroke-width: 1.5; + vector-effect: non-scaling-stroke; +} +.db-detail-chart .phase-band { opacity: 0.18; } +.db-detail-chart .phase-band.clean { fill: var(--phase-clean); } +.db-detail-chart .phase-band.armed { fill: var(--phase-armed); } +.db-detail-chart .phase-band.infecting { fill: var(--phase-infecting); } +.db-detail-chart .phase-band.infected_running { fill: var(--phase-running); } +.db-detail-chart .phase-band.dormant { fill: var(--phase-dormant); } +.db-detail-chart .placeholder { + fill: var(--fg-mute); font-size: 12px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} +.db-detail-legend { + display: flex; flex-wrap: wrap; gap: 14px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 11px; color: var(--fg-dim); + padding: 8px 14px; + border-top: 1px solid var(--line-soft); +} +.db-detail-legend > span { display: inline-flex; align-items: center; } +.db-detail-legend .swatch { + display: inline-block; width: 10px; height: 10px; + border-radius: 2px; margin-right: 6px; vertical-align: middle; } /* ─── Attack envelope thumbnails ───────────────────────────────────── */ diff --git a/training/dashboard/static/dashboard.js b/training/dashboard/static/dashboard.js index 2dedf82..d122fad 100644 --- a/training/dashboard/static/dashboard.js +++ b/training/dashboard/static/dashboard.js @@ -1163,12 +1163,14 @@ for epoch in range(20): // Real-data widget. Initial population from snapshot.recent_episodes // (last 200 lines of index.jsonl). New episodes prepend live. (function () { - const tabsEl = document.getElementById('db-tabs'); - const searchEl = document.getElementById('db-search'); - const tbodyEl = document.getElementById('db-tbody'); - const detailEl = document.getElementById('db-detail'); - const detailPre = document.getElementById('db-detail-pre'); - const countEl = document.getElementById('db-count'); + const tabsEl = document.getElementById('db-tabs'); + const searchEl = document.getElementById('db-search'); + const tbodyEl = document.getElementById('db-tbody'); + const detailEl = document.getElementById('db-detail'); + const detailMeta = document.getElementById('db-detail-meta'); + const detailChart = document.getElementById('db-detail-chart'); + const detailLegend = document.getElementById('db-detail-legend'); + const countEl = document.getElementById('db-count'); let records = []; // newest first let activeHost = null; // null = all @@ -1243,15 +1245,134 @@ for epoch in range(20): tr.addEventListener('click', e => { e.stopPropagation(); detailEl.hidden = false; - detailPre.textContent = JSON.stringify(rec, null, 2); tbodyEl.querySelectorAll('.db-row').forEach(r => r.classList.remove('selected')); tr.classList.add('selected'); + showEpisode(rec); }); frag.appendChild(tr); }); tbodyEl.appendChild(frag); } + // Fetch + render the per-episode telemetry chart. Decompresses + // and parses the .tar.zst on the server (see /api/episode in + // app.py); here we compute deltas on the cumulative counters + // and draw lines + phase bands. + async function showEpisode(rec) { + detailMeta.innerHTML = ` + ${rec.host_id || '—'} + ${rec.episode_id || '—'} + ${fmtBytes(rec.size_bytes)} + ${rec.received_at || ''}`; + detailChart.innerHTML = + 'loading…'; + detailLegend.innerHTML = ''; + try { + const url = `/api/episode/${encodeURIComponent(rec.host_id)}/${encodeURIComponent(rec.episode_id)}`; + const resp = await fetch(url); + if (!resp.ok) { + if (resp.status === 404) throw new Error('episode tarball not on disk'); + throw new Error(`HTTP ${resp.status}`); + } + const data = await resp.json(); + renderEpisodeChart(data); + } catch (err) { + detailChart.innerHTML = + `${err.message}`; + } + } + + function renderEpisodeChart(data) { + const W = 1000, H = 360; + const pad = { t: 16, r: 18, b: 32, l: 56 }; + const innerW = W - pad.l - pad.r; + const innerH = H - pad.t - pad.b; + + const samples = data.samples || []; + const labels = (data.labels || []) + .filter(l => typeof l.t_mono_ns === 'number') + .sort((a, b) => a.t_mono_ns - b.t_mono_ns); + + if (samples.length < 2) { + detailChart.innerHTML = + 'no telemetry samples'; + return; + } + + const tMin = samples[0].t_mono_ns; + const tMax = samples[samples.length - 1].t_mono_ns; + const tRange = Math.max(1, tMax - tMin); + + // Per-interval deltas of the cumulative counters. The README + // envelope uses CPU jiffies (user + sys) and IO bytes (read + + // write); both are running totals in /proc, so subtracting + // adjacent samples gives instantaneous-ish rates. + const cpu = [], io = []; + for (let i = 1; i < samples.length; i++) { + const a = samples[i - 1], b = samples[i]; + if (b.t_mono_ns - a.t_mono_ns <= 0) continue; + const cv = ((b.cpu_user_jiffies || 0) - (a.cpu_user_jiffies || 0)) + + ((b.cpu_sys_jiffies || 0) - (a.cpu_sys_jiffies || 0)); + const iv = ((b.io_read_bytes || 0) - (a.io_read_bytes || 0)) + + ((b.io_write_bytes || 0) - (a.io_write_bytes || 0)); + cpu.push({ t: b.t_mono_ns, v: Math.max(0, cv) }); + io.push({ t: b.t_mono_ns, v: Math.max(0, iv) }); + } + const cpuMax = Math.max(1, ...cpu.map(p => p.v)); + const ioMax = Math.max(1, ...io.map(p => p.v)); + + const tToX = t => pad.l + ((t - tMin) / tRange) * innerW; + const cpuToY = v => pad.t + innerH - (v / cpuMax) * innerH; + const ioToY = v => pad.t + innerH - (v / ioMax) * innerH; + + const cpuPath = cpu.map((p, i) => + `${i === 0 ? 'M' : 'L'}${tToX(p.t).toFixed(1)},${cpuToY(p.v).toFixed(1)}`).join(' '); + const ioPath = io.map((p, i) => + `${i === 0 ? 'M' : 'L'}${tToX(p.t).toFixed(1)},${ioToY(p.v).toFixed(1)}`).join(' '); + + // Colored band per labeled-phase span. + const phaseBands = []; + const phasesUsed = new Set(); + for (let i = 0; i < labels.length; i++) { + const start = labels[i].t_mono_ns; + const end = i + 1 < labels.length ? labels[i + 1].t_mono_ns : tMax; + const phase = labels[i].phase; + if (!phase) continue; + phasesUsed.add(phase); + const x = tToX(Math.max(start, tMin)); + const w = tToX(Math.min(end, tMax)) - x; + if (w > 0.5) { + phaseBands.push( + ``); + } + } + + const axisY = pad.t + innerH; + const durSec = (tRange / 1e9).toFixed(1); + + detailChart.innerHTML = ` + ${phaseBands.join('')} + + + peak + 0 + 0 s + ${durSec} s + + + `; + + const phaseList = Array.from(phasesUsed).map(p => { + const v = p === 'infected_running' ? 'phase-running' : `phase-${p}`; + return `${p}`; + }).join(''); + detailLegend.innerHTML = ` + cpu jiffies / interval + io bytes / interval + ${phaseList}`; + } + on('snapshot', m => { if (Array.isArray(m.recent_episodes)) { records = m.recent_episodes.slice(); diff --git a/training/dashboard/static/index.html b/training/dashboard/static/index.html index 42d84ca..52e8583 100644 --- a/training/dashboard/static/index.html +++ b/training/dashboard/static/index.html @@ -4,7 +4,7 @@ CIS490 — live - +