code cards: mirror the actual training stack and trainer loop
The stack scene's pyproject snippet was missing the `training` group (torch, sklearn, xgboost, zstandard) — the libraries that do the actual model work. Updated to match the real pyproject.toml. The receiver snippet now ends at _bearer_check(...) instead of the import block alone — gives the slide a non-trivial line of code to read. The training-code scene replaces the toy "PhaseLSTM" hand-rolled loop with the real LSTM model class (registry-decorated _SeqBase subclass + _LSTMClassifier wrapping nn.LSTM with last-step classification head) and adds a second card showing the actual train_nn loop: AMP autocast/scaler, cosine LR with linear warmup, inverse-frequency class weights, gradient clipping, macro-F1 on val, early stop with best-state restore. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c1c8e98180
commit
da0e9ce83c
2 changed files with 141 additions and 42 deletions
|
|
@ -847,6 +847,7 @@
|
|||
(function () {
|
||||
const PYPROJECT = `[project]
|
||||
name = "cis490"
|
||||
version = "0.0.1"
|
||||
description = "CIS490 behavioral malware detection — dataset, transport, training"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
|
|
@ -857,24 +858,26 @@ dependencies = [
|
|||
]
|
||||
|
||||
[dependency-groups]
|
||||
training = [
|
||||
"pyarrow>=15", "polars>=1.0",
|
||||
"numpy>=1.26", "scipy>=1.11",
|
||||
"scikit-learn>=1.4", # KNN, KMeans, PCA, metrics
|
||||
"xgboost>=2.0", # gradient-boosted trees baseline
|
||||
"torch>=2.2", # LSTM / GRU / RNN / CNN / Transformer
|
||||
"zstandard>=0.22", # episode tarball streaming
|
||||
]
|
||||
dev = [
|
||||
"pytest>=8",
|
||||
"pytest-asyncio>=0.23",
|
||||
"httpx>=0.27",
|
||||
"paramiko>=3", # SSH client for in-guest control on images that support it
|
||||
"pytest>=8", "pytest-asyncio>=0.23",
|
||||
"httpx>=0.27", "paramiko>=3",
|
||||
"matplotlib>=3.8", "tornado>=6",
|
||||
]
|
||||
`;
|
||||
const RECEIVER = `from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import secrets
|
||||
import time
|
||||
import json, logging, secrets, time
|
||||
from pathlib import Path
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse, Response
|
||||
from starlette.routing import Route
|
||||
|
||||
|
|
@ -882,6 +885,19 @@ from .store import EpisodeStore, is_valid_id
|
|||
from .version_gate import VersionGate
|
||||
|
||||
log = logging.getLogger("cis490.receiver")
|
||||
SUFFIX = ".tar.zst"
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
def _bearer_check(request, expected):
|
||||
if expected is None:
|
||||
return None
|
||||
auth = request.headers.get("authorization", "")
|
||||
if not auth.startswith("Bearer "):
|
||||
return JSONResponse({"error": "missing bearer token"}, status_code=401)
|
||||
presented = auth[len("Bearer "):]
|
||||
if not secrets.compare_digest(presented, expected):
|
||||
return JSONResponse({"error": "bad bearer token"}, status_code=401)
|
||||
return None
|
||||
`;
|
||||
|
||||
const PY_KEYWORDS = new Set([
|
||||
|
|
@ -978,42 +994,119 @@ log = logging.getLogger("cis490.receiver")
|
|||
}).join('\n');
|
||||
}
|
||||
|
||||
const TRAINER = `"""Train PhaseLSTM on the windowed dataset.
|
||||
const TRAINER = `"""Long Short-Term Memory over channel × time windows.
|
||||
|
||||
Each window is 10 s of /proc telemetry (100 samples × 12 channels)
|
||||
labeled with the phase that occupies its center. The LSTM reads the
|
||||
window timestep-by-timestep and predicts a single phase.
|
||||
|
||||
Held-out *samples* — not held-out time slices — are the bar that
|
||||
matters. Generalization to malware the model has never seen is the
|
||||
whole reason this dataset exists.
|
||||
"""
|
||||
Same input/output as GRU, swap the cell. ~30% more parameters than
|
||||
the GRU at the same hidden size; included so the comparison report
|
||||
can speak to the cell-choice question."""
|
||||
from __future__ import annotations
|
||||
from torch import nn
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
from training.models import register
|
||||
from training.models._torch_seq import _SeqBase
|
||||
|
||||
from training.data.windows import WindowedEpisodes
|
||||
from training.models.lstm import PhaseLSTM
|
||||
|
||||
ds = WindowedEpisodes("train", window_s=10, hz=10)
|
||||
loader = DataLoader(ds, batch_size=128, shuffle=True)
|
||||
model = PhaseLSTM(channels=12, hidden=64, num_phases=5).cuda()
|
||||
optim = torch.optim.AdamW(model.parameters(), lr=3e-4)
|
||||
loss_fn = nn.CrossEntropyLoss()
|
||||
@register("lstm")
|
||||
class LSTM(_SeqBase):
|
||||
def _build_module(self, *, n_channels_in, n_timesteps,
|
||||
n_classes, hidden=128, n_layers=2,
|
||||
dropout=0.1, bidirectional=False):
|
||||
return _LSTMClassifier(
|
||||
n_channels_in=n_channels_in, n_classes=n_classes,
|
||||
hidden=hidden, n_layers=n_layers,
|
||||
dropout=dropout, bidirectional=bidirectional,
|
||||
)
|
||||
|
||||
for epoch in range(20):
|
||||
for x, y in loader:
|
||||
loss = loss_fn(model(x.cuda()), y.cuda())
|
||||
optim.zero_grad()
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
class _LSTMClassifier(nn.Module):
|
||||
def __init__(self, *, n_channels_in, n_classes, hidden,
|
||||
n_layers, dropout, bidirectional):
|
||||
super().__init__()
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=n_channels_in, hidden_size=hidden,
|
||||
num_layers=n_layers,
|
||||
dropout=dropout if n_layers > 1 else 0.0,
|
||||
batch_first=True, bidirectional=bidirectional,
|
||||
)
|
||||
d_out = hidden * (2 if bidirectional else 1)
|
||||
self.head = nn.Sequential(
|
||||
nn.Dropout(dropout),
|
||||
nn.Linear(d_out, n_classes),
|
||||
)
|
||||
|
||||
def forward(self, x): # (B, C, T) -> (B, T, C)
|
||||
x = x.transpose(1, 2)
|
||||
out, _ = self.lstm(x)
|
||||
return self.head(out[:, -1, :]) # last-step classification
|
||||
`;
|
||||
|
||||
document.getElementById('code-pyproject').innerHTML = highlightToml(PYPROJECT);
|
||||
document.getElementById('code-receiver').innerHTML = highlightPython(RECEIVER);
|
||||
document.getElementById('code-train-lstm').innerHTML = highlightPython(TRAINER);
|
||||
const TRAIN_LOOP = `def train_nn(*, model, X_train, y_train, X_val, y_val,
|
||||
n_classes, epochs=60, batch_size=512,
|
||||
base_lr=1e-3, weight_decay=1e-4,
|
||||
warmup_frac=0.05, grad_clip=1.0,
|
||||
patience=8, device="auto") -> TrainResult:
|
||||
"""Train a model; return TrainResult with the best-on-val
|
||||
state_dict already loaded back into model.module."""
|
||||
if device == "auto":
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
use_amp = device == "cuda"
|
||||
mod = model.module.to(device)
|
||||
|
||||
# Inverse-frequency class weights (capped) — clean dominates
|
||||
# the dataset, so unweighted CE just learns "everything is fine."
|
||||
cw = _compute_class_weights(y_train, n_classes)
|
||||
loss_fn = nn.CrossEntropyLoss(
|
||||
weight=torch.from_numpy(cw).to(device))
|
||||
|
||||
opt = torch.optim.AdamW(mod.parameters(), lr=base_lr,
|
||||
weight_decay=weight_decay)
|
||||
scaler = torch.amp.GradScaler("cuda") if use_amp else None
|
||||
|
||||
best_f1, best_state, no_improve = -1.0, None, 0
|
||||
step, total_steps = 0, epochs * len(train_dl)
|
||||
warmup = int(total_steps * warmup_frac)
|
||||
|
||||
for ep in range(1, epochs + 1):
|
||||
mod.train()
|
||||
for xb, yb in train_dl:
|
||||
xb, yb = xb.to(device), yb.to(device)
|
||||
# Cosine LR with linear warmup
|
||||
for g in opt.param_groups:
|
||||
g["lr"] = _cosine_lr(step,
|
||||
total_steps=total_steps,
|
||||
warmup_steps=warmup, base_lr=base_lr)
|
||||
opt.zero_grad(set_to_none=True)
|
||||
if use_amp:
|
||||
with torch.amp.autocast("cuda"):
|
||||
loss = loss_fn(mod(xb), yb)
|
||||
scaler.scale(loss).backward()
|
||||
scaler.unscale_(opt)
|
||||
nn.utils.clip_grad_norm_(mod.parameters(), grad_clip)
|
||||
scaler.step(opt); scaler.update()
|
||||
else:
|
||||
loss = loss_fn(mod(xb), yb)
|
||||
loss.backward()
|
||||
nn.utils.clip_grad_norm_(mod.parameters(), grad_clip)
|
||||
opt.step()
|
||||
step += 1
|
||||
|
||||
# Macro-F1 on val (not accuracy: classes are imbalanced)
|
||||
f1 = _macro_f1(y_val, _predict(mod, val_dl), n_classes)
|
||||
if f1 > best_f1 + 1e-4:
|
||||
best_f1, best_state, no_improve = f1, mod.state_dict(), 0
|
||||
else:
|
||||
no_improve += 1
|
||||
if no_improve >= patience:
|
||||
break # early stop
|
||||
|
||||
mod.load_state_dict(best_state)
|
||||
return TrainResult(best_f1=best_f1, best_state=best_state, ...)
|
||||
`;
|
||||
|
||||
document.getElementById('code-pyproject').innerHTML = highlightToml(PYPROJECT);
|
||||
document.getElementById('code-receiver').innerHTML = highlightPython(RECEIVER);
|
||||
document.getElementById('code-train-lstm').innerHTML = highlightPython(TRAINER);
|
||||
document.getElementById('code-train-loop').innerHTML = highlightPython(TRAIN_LOOP);
|
||||
})();
|
||||
|
||||
// ── Ingest counter + 60-second sparkline ──────────────────────
|
||||
|
|
|
|||
|
|
@ -286,9 +286,15 @@
|
|||
<div class="stage-view" data-view="training-code">
|
||||
<div class="metric-stack metric-stack-wide">
|
||||
<div class="metric-eyebrow">how we trained the sequence models</div>
|
||||
<div class="code-card">
|
||||
<div class="code-card-header">training/models/lstm.py</div>
|
||||
<pre class="code" id="code-train-lstm"></pre>
|
||||
<div class="code-grid">
|
||||
<div class="code-card">
|
||||
<div class="code-card-header">training/models/lstm.py</div>
|
||||
<pre class="code" id="code-train-lstm"></pre>
|
||||
</div>
|
||||
<div class="code-card">
|
||||
<div class="code-card-header">training/trainer/_loop.py · train_nn</div>
|
||||
<pre class="code" id="code-train-loop"></pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -570,6 +576,6 @@
|
|||
</article>
|
||||
</div>
|
||||
|
||||
<script src="/static/dashboard.js?v=061aec1c"></script>
|
||||
<script src="/static/dashboard.js?v=15fac426"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue