diff --git a/references/links.md b/references/links.md index 14a2e5a..a7f393e 100644 --- a/references/links.md +++ b/references/links.md @@ -1,6 +1,10 @@ # Reference Links -- https://github.com/mitre/caldera?tab=security-ov-file -- https://github.com/PiyushxJangid/DLHIDS -- https://github.com/ArpanDFrank/Host-Intrusion-Detection-System-using-Hybrid-CNN-LSTM-Models-and-RL-Actor-Critic-Models +- https://github.com/mitre/caldera?tab=security-ov-file — adversary emulation framework (red-team/blue-team) +- https://github.com/PiyushxJangid/DLHIDS — deep-learning HIDS reference implementation +- https://github.com/ArpanDFrank/Host-Intrusion-Detection-System-using-Hybrid-CNN-LSTM-Models-and-RL-Actor-Critic-Models — Hybrid CNN-LSTM + RL HIDS prior art - https://ieeexplore.ieee.org/document/9881803 — per-device trust establishment from network behaviour (cited on motivation scene) +- https://pytorch.org/docs/stable/index.html — PyTorch reference (LSTM / GRU / CNN / Transformer module APIs used by the model zoo) +- https://xgboost.readthedocs.io/en/stable/ — XGBoost reference (gradient-boosted-trees baseline; Chen & Guestrin, KDD 2016) +- https://scikit-learn.org/stable/ — scikit-learn reference (KNN, KMeans, PCA, evaluation metrics) +- https://man7.org/linux/man-pages/man5/proc.5.html — proc(5) — the Linux kernel interface this project's telemetry comes from diff --git a/training/dashboard/static/dashboard.css b/training/dashboard/static/dashboard.css index 2b0a7f6..3bd22ab 100644 --- a/training/dashboard/static/dashboard.css +++ b/training/dashboard/static/dashboard.css @@ -1067,6 +1067,205 @@ html, body { overflow-anchor: none; } color: var(--fg-dim); } +/* ─── Problem statement (scene: problem-statement) ─────────────────── */ +.problem-claim { + padding: clamp(16px, 2vh, 28px) clamp(18px, 2vw, 28px); + background: var(--bg-elev, rgba(255, 255, 255, 0.03)); + border: 1px solid var(--line); + border-left: 4px solid var(--accent); + border-radius: 4px; +} +.problem-claim-text { + font-size: clamp(16px, 1.5vw, 22px); + line-height: 1.45; + color: var(--fg); + font-weight: 500; +} +.problem-stats { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: clamp(10px, 1.4vw, 18px); +} +.problem-stat { + padding: clamp(14px, 1.8vh, 22px); + background: var(--bg-elev, rgba(255, 255, 255, 0.03)); + border: 1px solid var(--line); + border-radius: 4px; + display: flex; flex-direction: column; gap: 4px; + align-items: flex-start; +} +.problem-stat-num { + font: 700 clamp(28px, 3.4vw, 44px) + ui-monospace, SFMono-Regular, Menlo, monospace; + color: var(--accent); + line-height: 1; +} +.problem-stat-lbl { + font-size: clamp(12px, 0.95vw, 14px); + color: var(--fg-dim); + line-height: 1.35; +} +.problem-task { + padding: 12px 16px; + background: var(--bg); + border: 1px solid var(--line); + border-radius: 4px; + font-size: clamp(13px, 1vw, 15px); + color: var(--fg-dim); + line-height: 1.5; +} +.problem-task-label { color: var(--fg-mute); margin-right: 6px; } +.problem-task-value { color: var(--fg); font-weight: 600; } +.problem-task-detail { color: var(--fg-dim); } + +/* ─── Research questions (scene: research-questions) ───────────────── */ +.research-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: clamp(12px, 1.6vw, 22px); +} +.research-col { + padding: clamp(14px, 1.8vh, 22px); + background: var(--bg-elev, rgba(255, 255, 255, 0.03)); + border: 1px solid var(--line); + border-radius: 4px; +} +.research-col-title { + font: 600 clamp(13px, 1.05vw, 15px) + ui-monospace, SFMono-Regular, Menlo, monospace; + color: var(--accent); + letter-spacing: 0.04em; + text-transform: uppercase; + margin-bottom: 12px; +} +.research-list { + list-style: none; padding: 0; margin: 0; + display: flex; flex-direction: column; gap: 10px; + font-size: clamp(13px, 1vw, 15px); + line-height: 1.45; + color: var(--fg-dim); +} +.research-list li::before { + content: '·'; color: var(--accent); margin-right: 8px; +} +.research-list strong { color: var(--fg); } + +/* ─── Solution overview (scene: solution-overview) ─────────────────── */ +.pipeline-svg { + width: 100%; + height: clamp(360px, 60vh, 640px); + background: var(--bg-elev, rgba(255, 255, 255, 0.03)); + border: 1px solid var(--line); + border-radius: 4px; + padding: 12px; + box-sizing: border-box; +} +.pipeline-stage rect { + fill: var(--bg); + stroke: var(--accent); + stroke-width: 1.5; +} +.pipeline-stage-models rect { + fill: var(--accent-soft, rgba(80, 140, 220, 0.08)); + stroke-width: 2; +} +.pipeline-stage-final rect { + stroke: var(--phase-clean); +} +.pipeline-stage text { + fill: var(--fg); + font: 600 14px ui-monospace, SFMono-Regular, Menlo, monospace; +} +.pipeline-stage-title { + font-size: 16px !important; +} +.pipeline-detail { + fill: var(--fg-dim) !important; + font-weight: 400 !important; + font-size: 11px !important; +} +.pipeline-detail-mini { + fill: var(--fg-mute) !important; + font-weight: 400 !important; + font-size: 10px !important; +} +.pipeline-arrow path { + stroke: var(--fg-mute); + stroke-width: 1.5; + stroke-linecap: round; + marker-end: url(#pipe-arrow); +} + +/* ─── Evaluation setup (scene: evaluation-setup) ───────────────────── */ +.eval-blocks { + display: grid; + grid-template-columns: 1fr 1fr; + gap: clamp(10px, 1.4vw, 18px); +} +.eval-block { + padding: clamp(12px, 1.6vh, 18px); + background: var(--bg-elev, rgba(255, 255, 255, 0.03)); + border: 1px solid var(--line); + border-radius: 4px; + display: flex; flex-direction: column; gap: 8px; +} +.eval-block-title { + font: 600 clamp(12px, 0.95vw, 14px) + ui-monospace, SFMono-Regular, Menlo, monospace; + color: var(--accent); + letter-spacing: 0.04em; + text-transform: uppercase; +} +.eval-block-body { + display: flex; flex-direction: column; gap: 6px; + font-size: clamp(13px, 1vw, 15px); + color: var(--fg-dim); + line-height: 1.45; +} +.eval-block-body strong { color: var(--fg); } +.eval-detail { + margin-top: 4px; + color: var(--fg-mute); + font-size: clamp(12px, 0.9vw, 13px); + font-style: italic; +} + +/* ─── Conclusion + future (scene: conclusion-future) ───────────────── */ +.conclusion-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: clamp(12px, 1.6vw, 22px); +} +.conclusion-col { + padding: clamp(14px, 1.8vh, 22px); + background: var(--bg-elev, rgba(255, 255, 255, 0.03)); + border: 1px solid var(--line); + border-radius: 4px; +} +.conclusion-col-title { + font: 600 clamp(13px, 1.05vw, 15px) + ui-monospace, SFMono-Regular, Menlo, monospace; + color: var(--accent); + letter-spacing: 0.04em; + text-transform: uppercase; + margin-bottom: 12px; +} +.conclusion-list { + list-style: none; padding: 0; margin: 0; + display: flex; flex-direction: column; gap: 10px; + font-size: clamp(13px, 1vw, 15px); + line-height: 1.45; + color: var(--fg-dim); +} +.conclusion-list li::before { + content: '·'; color: var(--accent); margin-right: 8px; +} +.conclusion-list strong { color: var(--fg); } + +/* ─── Limitations card uses the motivation-card pattern with an + armed-phase marker for the "warning" feel. ─── */ +.motivation-card-marker.mc-armed { background: var(--phase-armed); } + /* ─── Live detections (scene: live) ────────────────────────────────── */ .live-stack { gap: clamp(10px, 1.6vh, 20px); } diff --git a/training/dashboard/static/index.html b/training/dashboard/static/index.html index b5b356d..3e6abd0 100644 --- a/training/dashboard/static/index.html +++ b/training/dashboard/static/index.html @@ -4,7 +4,7 @@ CIS490 — live - + + +
+
+
the problem · single sentence + numbers
+
+
Classify each ten-second window of fleet + /proc telemetry into one of five workload phases — + accurately enough to drive automated containment.
+
+
+
+
5
+
phase classes
cleaninfected_running
+
+
+
12
+
/proc channels
no syscalls, no kernel hooks
+
+
+
10s
+
classification window
100 samples × 12 channels
+
+
+
+ task type: + multi-class classification + — five mutually-exclusive + phase labels, balanced via class-weighted cross-entropy. + Not regression (no continuous target), not ranking + (downstream policy is a categorical containment decision). +
+
+
+ + +
+
+
literature gaps · positioning the work
+
+
+
what prior work covers
+
    +
  • LSTM on syscall traces in VMs — + deeper telemetry than /proc
  • +
  • Transformer on per-process resource metrics + — related signal, single-host eval
  • +
  • BERT on system logs (LogBERT) — + text-form telemetry, not numeric channels
  • +
  • Insider-threat LSTM on event logs + (DANTE) — categorical events, not continuous
  • +
  • Network-behaviour trust establishment + (IEEE 9881803) — cross-device aggregation, + not per-host classifier
  • +
+
+
+
what's missing
+
    +
  • /proc-only signal — most work + assumes syscalls or kernel hooks
  • +
  • Cross-host generalization — eval + splits often hide it (held-out by sample, not host)
  • +
  • Real-time per-window classification + for containment, not post-hoc batch labelling
  • +
  • Side-by-side cell-choice comparison + (RNN/GRU/LSTM/CNN/Transformer) on one dataset
  • +
  • Direct integration with a + fleet-wide trust score, not standalone output
  • +
+
+
+
+
+ + +
+
+
pipeline · what each stage produces
+ + + + fleet hosts + /proc · 10 Hz + + + + receiver (Pi) + bearer auth + + + + episode store + zstd · tar + + + + windowing + features + 10 s · 100 samples × 12 ch + + + + model zoo + KNN · GBT · MLP · CNN · RNN · GRU · LSTM · Transformer + trained per (model × split-recipe) + cross-host eval · class-weighted CE · early stop on val macro-F1 + + + + per-window phase + 5-class softmax + + + + trust score + + network signals (9881803) + + + + containment + reset + snapshot rollback + + + + + + + + + + + +
+
+ +
the stack behind the live data on the right
@@ -316,7 +452,60 @@
- + +
+
+
evaluation setup · how the numbers get made
+
+
+
split recipe
+
+
train ∪ val: elliott-thinkpad
+
test: k-gamingcom
+
held-out by host so the test set + measures cross-device generalization, not in-distribution + self-prediction. A 90 % accuracy that comes from + recognising the host's idle profile is worthless for + a fleet detector.
+
+
+
+
primary metric
+
+
macro-F1 averaged across the five phases
+
accuracy lies under class + imbalance — ~50 % infected_running, + ~5 % armed. A constant majority predictor + hits 0.5 accuracy. macro-F1 averages per-class F1, + so rare phases actually count toward the score.
+
+
+
+
baselines compared
+
+
KNN — non-parametric, instance-based
+
GBT (XGBoost) — tabular non-NN
+
MLP — feedforward ablation
+
CNN — local-pattern ablation
+
RNN / GRU / LSTM — recurrent family
+
Transformer — attention
+
+
+
+
reported alongside accuracy
+
+
μs / window — inference cost at batch=64
+
cross-host gap — val − test macro-F1
+
latency translates to containment + lag; the gap is the honest measure of generalization. + Both are plotted on the perf scene.
+
+
+
+
+
+ +
sequence models · accuracy on held-out samples
@@ -386,7 +575,7 @@
- +
@@ -403,6 +592,228 @@
+ +
+
+
theoretical contributions · what's new methodologically
+
+
+
+
+
window-centre labelling
+
A 10-second + classification window is labelled by the phase that + occupies its centre, not by majority vote across the + window. Cleaner training signal at phase boundaries, + and avoids the spurious "ambiguous" class.
+
+
+
+
+
+
schema-hashed checkpoints
+
Each checkpoint + embeds a hash of the feature schema; loading a model + against the wrong schema fails fast instead of + silently scoring on misaligned columns. Makes + retroactive comparison reproducible.
+
+
+
+
+
+
cross-host as the eval axis
+
Held-out-by-host + is reported as a first-class number alongside + held-out-by-sample. The two often disagree by 0.4 + macro-F1, and only the cross-host number predicts + fleet behaviour.
+
+
+
+
+
+ + +
+
+
practical contributions · what others can use
+
+
+
+
+
/proc-only deployment
+
No syscall hooks, no + eBPF, no kernel module — runs on hosts that don't + permit deep instrumentation. The detector is one + Python service plus a model file.
+
+
+
+
+
+
producer-agnostic dashboard
+
The deck consumes + typed events; the inference loop runs anywhere + (Pi, A100, cloud) and just POSTs back. Same UI for + a lab demo and an operational console.
+
+
+
+
+
+
labelled dataset on disk
+
78,000+ episodes, + five phases, two hosts, six attack profiles — + archived in zstd-compressed tarballs with a + schema-versioned format. Ready for downstream + work without re-running the orchestrator.
+
+
+
+
+
+ + +
+
+
design principles · patterns that emerged
+
+
+
+
+
one loop, many models
+
Every NN architecture + plugs into the same training loop — class weights, + AMP, cosine LR, early stop. Architecture changes + don't ripple into orchestration.
+
+
+
+
+
+
typed events as contract
+
Producers and + consumers agree on dataclasses + (events.py), not free-form dicts. + Adding a new scene means adding a new dataclass; + adding a new producer means importing it.
+
+
+
+
+
+
two-agent path ownership
+
Dashboard work and + model work live in two parallel sessions with a + documented path-ownership boundary. Merges go + through git with explicit rebases instead of a + shared workspace.
+
+
+
+
+
+ + +
+
+
limitations · the honest list
+
+
+
+
+
two-host fleet
+
Cross-host generalization + is reported between exactly two machines + (elliott-thinkpad → k-gamingcom). N-host claims need + more hosts on the WireGuard mesh.
+
+
+
+
+
+
synthetic attack profiles
+
Six profiles cover the + main shapes (cpu-saturate, ransomware-lite, bursty-c2, + fork-bomb, crypto-miner, distccd-exec) but real-world + malware can sit between or outside these envelopes.
+
+
+
+
+
+
10 Hz sampling floor
+
Sub-100ms attack + behaviours fall inside a single sample. Detection of + extremely short-lived attacks (millisecond-scale + privilege checks) requires faster sampling than + /proc currently provides.
+
+
+
+
+
+
KNN cross-host gap
+
KNN scores val + macro-F1 ≈ 0.74 on elliott-thinkpad but only 0.13 on + the held-out k-gamingcom. Instance-based memorization + of the training host's feature space — informative + as a baseline, but not a deployment candidate.
+
+
+
+
+
+ + +
+
+
conclusion + future work
+
+
+
what we showed
+
    +
  • A per-host detector trained on + /proc-only telemetry can classify + workload phases at multi-class macro-F1 well above + chance.
  • +
  • Held-out-by-host evaluation is the + right generalization axis; held-out-by-sample + overstates real fleet performance by 0.3+ F1.
  • +
  • The recurrent family (LSTM/GRU) and Transformer + sit on the upper-left of the + accuracy-vs-cost frontier; KNN and + GBT round out the comparison as honest baselines.
  • +
  • The detector slots into a wider trust / + containment / recovery loop — the per-host + verdict isn't the final answer, it's one input.
  • +
+
+
+
next steps · unsupervised
+
    +
  • Clustering the unlabeled tail of + new fleet data (KMeans / HDBSCAN) to surface novel + workload shapes the supervised model has no class + for — a self-training feedback loop.
  • +
  • Anomaly detection on the + last-layer embedding (one-class SVM, isolation forest) + so a "none of the five known phases" verdict is + available alongside the classifier output.
  • +
  • Self-supervised pretraining on + the much larger pool of unlabeled telemetry from + operational hosts; supervised fine-tune on the + smaller orchestrated dataset.
  • +
  • Embedding visualisation via + UMAP / t-SNE for human-in-the-loop labelling of + the unlabeled tail (already prototyped in scene 12).
  • +
+
+
+
+
+
@@ -453,6 +864,79 @@ +
+
+

Problem statement

+

Today's behaviour-based IDS systems rely on syscall traces, + kernel hooks, or rich endpoint agents that can't ship to + constrained or untrusted hosts. We want a detector that + runs on the only telemetry every modern Linux already + exports — /proc — and labels each ten-second + window of activity with the phase the workload is in.

+

Research question. Can a sequence model + trained on twelve channels of /proc telemetry + classify five workload phases (clean / armed / infecting / + infected_running / dormant) accurately enough to drive + automated containment, and generalize across hosts + and malware profiles it has never seen during training?

+

The task is multi-class classification: + the target is one of five mutually-exclusive phase labels. + Not regression (no continuous target), not ranking + (downstream policy is a categorical containment decision). + We deliberately chose 10-second windows so detection + latency stays bounded for a real fleet.

+
+
+ +
+
+

Research gaps + questions

+

Literature on behaviour-based malware detection is rich but + uneven. Most published results either (a) use richer + telemetry than what a constrained host actually exports, or + (b) frame evaluation in ways that hide the cross-host + generalization problem. The card on the left summarises the + gap.

+

This project asks three concrete questions:

+

RQ1. How well can a per-window classifier + identify workload phases from /proc alone, with + no syscall traces and no kernel hooks?

+

RQ2. Does the model still work when test + episodes come from a host the training set never saw?

+

RQ3. Of the standard sequence-model + families (RNN, GRU, LSTM, CNN, Transformer) plus a + non-parametric baseline (KNN) and a tabular baseline + (gradient-boosted trees), which trade off accuracy and + inference cost best for a deployment that has to run on a + constrained host?

+
+
+ +
+
+

Proposed solution

+

A single end-to-end pipeline turns raw /proc + telemetry on a fleet host into a per-window phase verdict + in under a second. Each stage of the diagram on the left + is a thin, independently-deployable component — the + receiver doesn't know what model is running; the model + doesn't know where the episode came from.

+

The model zoo is the key abstraction: + every model class registers itself by name, declares its + input kind (summary features or window tensors), and plugs + into one shared training loop. KNN, GBT, MLP, CNN, RNN, + GRU, LSTM, and Transformer all reuse the same standardization, + schema-hashed checkpoint format, class-weighted CE loss, + and held-out-by-host evaluation — so the comparison is + genuinely apples-to-apples.

+

The detector's per-window verdict feeds two downstream + loops: a fleet-wide trust score that + combines local classification with network-behaviour + signals (per IEEE 9881803), and a fast-recovery + snapshot rollback when an infection time is known.

+
+
+

Live, not staged

@@ -553,6 +1037,31 @@
+
+
+

Evaluation setup

+

Three choices anchor every result on the next slides — the + split recipe, the primary metric, and what we measure next + to accuracy. The temptation is to report a single big + number; we report a number you can argue with.

+

Held-out by host. Train and validate on + one machine; test on a different machine. A model that + wins by memorising the train host's idle profile loses + here, which is what you want — a fleet detector has to + generalize across hosts it never saw at training time.

+

Macro-F1, not accuracy. The dataset is + heavily skewed: roughly half the labelled time is + infected_running and only ~5 % is + armed. A "predict the majority class" + baseline already hits 0.5 accuracy. Macro-F1 averages F1 + across all five phases so rare classes count.

+

Latency reported with accuracy. A model + that's one F1 point better but ten milliseconds slower + may still be the wrong choice for an on-host detector. + The perf scene plots both axes so the trade-off is visible.

+
+
+

Sequence models

@@ -632,6 +1141,141 @@
+
+
+

Theoretical contributions

+

Three methodological claims this project makes — small in + isolation, but together they change how the comparison is + run. Each shows up explicitly in the codebase.

+

Window-centre labelling. Instead of + majority-voting phase labels across each 10-second window + (which creates noisy boundaries), we label each window by + the phase that occupies its centre. Cleaner training + signal at transitions, no spurious "ambiguous" class.

+

Schema-hashed checkpoints. Every + checkpoint embeds a hash of the feature schema it was + trained on. Loading a model against a different schema + fails fast. Without this, retroactive comparison silently + scores models on misaligned columns and reports nonsense.

+

Cross-host as the eval axis. + Held-out-by-host is reported as a first-class number + alongside held-out-by-sample — the two often disagree by + ~0.4 macro-F1, and only the cross-host number predicts + real fleet behaviour.

+
+
+ +
+
+

Practical contributions

+

What others can pick up and use from this project — beyond + the published numbers.

+

/proc-only deployment. The detector needs + no syscall hooks, no eBPF, no kernel module. It runs on + hosts that don't permit deeper instrumentation — a small + VM, a container with limited capabilities, an embedded + device. One Python service plus a model file.

+

Producer-agnostic dashboard. The deck + consumes typed events + (training/dashboard/events.py); the inference + loop runs anywhere — Pi, A100, cloud — and just POSTs back. + Same UI for a lab demo and an operational console.

+

Labelled dataset on disk. 78 000+ + episodes across two hosts and six attack profiles, archived + in zstd-compressed tarballs with a schema-versioned format. + Anyone reproducing or extending this work can start from + the dataset directly without re-running the orchestrator.

+
+
+ +
+
+

Design principles

+

Three patterns that emerged during the project and earned + their keep enough that we'd repeat them.

+

One loop, many models. Every NN + architecture plugs into the same training loop — class + weights, AMP autocast, cosine LR with warmup, gradient + clipping, early stop on val macro-F1. Architecture changes + don't ripple into orchestration, and adding a new model + class costs ~80 lines.

+

Typed events as contract. Producers and + consumers agree on dataclasses, not free-form dicts. + Adding a new dashboard scene means adding a new dataclass; + adding a new producer means importing it. Static checking + and editor autocomplete do most of the work that a + schema-validation library would do at runtime.

+

Two-agent path ownership. Dashboard work + and model work live in two parallel sessions with a + documented path-ownership boundary + (training/dashboard/ vs everywhere else). + Merges go through git with explicit rebases instead of a + shared workspace — slow up front, fewer subtle stomps + over time.

+
+
+ +
+
+

Limitations

+

What this project cannot honestly claim — and why each + line on the left matters for how the results should be read.

+

Two-host fleet. Cross-host generalization + is reported between exactly two machines; it's the right + shape of evaluation but not a population claim. + More hosts on the WireGuard mesh would let us report + distributional bounds rather than single point comparisons.

+

Synthetic attack profiles. Our six + profiles cover the main behavioural envelopes + (cpu-saturate, ransomware-lite, bursty-c2, fork-bomb, + crypto-miner, distccd-exec) but real-world malware can + sit between or outside these envelopes. Generalization to + unseen profiles is reported via held-out-by-sample, but + in-the-wild distribution shift is unknown.

+

10 Hz sampling floor. Sub-100ms + behaviours fall inside a single sample. Detection of + millisecond-scale privilege checks would need faster + telemetry than /proc provides.

+

KNN cross-host gap. KNN scores val + macro-F1 ≈ 0.74 on the train host but only ≈ 0.13 on the + held-out one. Instance-based memorization of the training + host's feature space — informative as a baseline, not a + deployment candidate.

+
+
+ +
+
+

Conclusion + future work

+

A per-host classifier trained on /proc-only + telemetry can identify workload phases at multi-class + macro-F1 well above chance and slot into a wider + trust / containment / recovery loop. The recurrent family + (LSTM/GRU) and Transformer sit on the upper-left of the + accuracy-vs-cost frontier; KNN and GBT are honest baselines. + Held-out-by-host evaluation is the right generalization + axis — held-out-by-sample overstates real fleet + performance by 0.3+ F1.

+

Unsupervised next steps. The natural + extensions are unsupervised:

+

Clustering the unlabeled tail of new + fleet data (KMeans / HDBSCAN) to surface novel workload + shapes the supervised model has no class for — a + self-training feedback loop that enrolls new phases as + the fleet grows.

+

Anomaly detection on the last-layer + embedding (one-class SVM, isolation forest) so a "none of + the five known phases" verdict is available alongside the + classifier output.

+

Self-supervised pretraining on the much + larger pool of unlabeled telemetry from operational hosts; + supervised fine-tune on the smaller orchestrated dataset.

+

Embedding visualisation via UMAP / + t-SNE for human-in-the-loop labelling — already prototyped + in the KNN scene's interactive 3-D scatter.

+
+
+

References