CIS490/training/dashboard/static/index.html

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>CIS490 — live</title>
  <link rel="stylesheet" href="/static/dashboard.css?v=3434f117">
</head>
<body>
  <!-- SVG filter defs for the lava-lamp goo effect. Width/height 0
       so it doesn't take layout space; the filter is referenced by
       CSS via filter: url(#goo). -->
  <svg class="goo-defs" width="0" height="0" aria-hidden="true">
    <defs>
      <filter id="goo">
        <feGaussianBlur in="SourceGraphic" stdDeviation="22" result="blur"/>
        <feColorMatrix in="blur" mode="matrix" result="goo" values="
          1 0 0 0 0
          0 1 0 0 0
          0 0 1 0 0
          0 0 0 26 -12"/>
        <feBlend in="SourceGraphic" in2="goo"/>
      </filter>
    </defs>
  </svg>

  <!-- Theme background layers — exactly one is visible at a time,
       selected by body[data-theme]. The blobs / bubbles / beams
       inside drift / lava / laser are generated by JS so the count
       and statistical-distribution sliders actually take effect. -->
  <div class="bg-canvas" id="bg-canvas" aria-hidden="true">
    <div class="bg-tint"></div>

    <div class="bg-drift" id="bg-drift"></div>

    <div class="bg-lava">
      <div class="goo-container" id="bg-lava-bubbles"></div>
    </div>

    <div class="bg-vaporwave">
      <div class="vw-sky"></div>
      <!-- Scanlines BEFORE sun: the sun's solid disc occludes
           scanlines inside its area so they can't beat against the
           sun's venetian-blind stripes (the same kind of moiré
           that previously appeared between scanlines and the
           perspective floor — same shape, smaller scale). -->
      <div class="vw-scanlines"></div>
      <div class="vw-sun"><div class="vw-sun-blinds"></div></div>
      <div class="vw-horizon"></div>
      <div class="vw-floor"><div class="vw-floor-grid"></div></div>
    </div>

    <div class="bg-laser" id="bg-laser-beams"></div>
  </div>

  <!-- Right-half sidebar theme panel. Slides in/out via the
       `is-open` class — we don't use the `hidden` attribute because
       the transform animation needs the panel to stay rendered. -->
  <div class="theme-panel" id="theme-panel">
    <div class="theme-panel-header">
      <span class="theme-title">theme · OKLCH</span>
      <button id="theme-close" class="ghost icon" title="Close (t)">×</button>
    </div>

    <label class="theme-row">
      <span>background</span>
      <select id="theme-bg">
        <option value="black">black (still)</option>
        <option value="drift">drift (soft blobs)</option>
        <option value="lava">lava lamp (goo metaballs)</option>
        <option value="vaporwave">vaporwave</option>
        <option value="laser">laser show</option>
      </select>
    </label>

    <div class="theme-wheel-block">
      <div class="theme-wheel" id="theme-wheel">
        <div class="wheel-disc"></div>
        <div class="wheel-rim"></div>
        <div class="wheel-markers" id="wheel-markers"></div>
      </div>
      <div class="theme-sliders">
        <label>L · lightness · <span id="theme-l-val">70</span>%
          <input type="range" id="theme-l" min="20" max="95" value="70" step="1"></label>
        <label>C · chroma · <span id="theme-c-val">0.15</span>
          <input type="range" id="theme-c" min="0" max="0.4" value="0.15" step="0.005"></label>
        <label>H · hue · <span id="theme-h-val">250</span>°
          <input type="range" id="theme-h" min="0" max="360" value="250" step="1"></label>
      </div>
    </div>

    <div class="theme-sliders theme-harmony-block">
      <label>colors · count · <span id="theme-count-val">3</span>
        <input type="range" id="theme-count" min="1" max="6" value="3" step="1"></label>
      <label>spread · angular range · <span id="theme-spread-val">60</span>°
        <input type="range" id="theme-spread" min="0" max="300" value="60" step="1"></label>
      <div class="theme-harmony-hint" id="theme-harmony-hint"></div>
    </div>

    <details class="theme-advanced">
      <summary>advanced — palette ladder</summary>
      <div class="theme-sliders">
        <label>L variance · per-color lightness ladder · <span id="theme-lvar-val">0</span>
          <input type="range" id="theme-lvar" min="0" max="40" value="0" step="1"></label>
        <label>C variance · per-color chroma ladder · <span id="theme-cvar-val">0.00</span>
          <input type="range" id="theme-cvar" min="0" max="0.15" value="0" step="0.005"></label>
      </div>
    </details>

    <div class="theme-row">
      <span>palette</span>
      <div class="theme-swatches" id="theme-swatches"></div>
    </div>

    <details class="theme-advanced" open>
      <summary>animation · global</summary>
      <div class="theme-sliders">
        <label>speed · <span id="theme-speed-val">1.00</span>×
          <input type="range" id="theme-speed" min="0.1" max="4" value="1" step="0.05"></label>
        <label>blur · <span id="theme-blur-val">0</span> px
          <input type="range" id="theme-blur" min="0" max="40" value="0" step="1"></label>
        <label>tint strength · <span id="theme-tint-val">0.10</span>
          <input type="range" id="theme-tint" min="0" max="0.6" value="0.1" step="0.02"></label>
        <label>content backdrop · <span id="theme-backdrop-val">0.30</span>
          <input type="range" id="theme-backdrop" min="0" max="1" value="0.3" step="0.05"></label>
      </div>
    </details>

    <!-- Per-theme settings — dynamically built by JS from the THEMES
         spec; only the section matching state.background is shown. -->
    <div id="theme-bg-settings"></div>

    <div class="theme-meta-row">
      <code id="theme-meta">oklch(70% 0.15 250)</code>
      <button id="theme-reset" class="ghost">reset</button>
    </div>
  </div>

  <header class="topbar">
    <span class="brand">CIS490</span>
    <span id="status" class="status">connecting…</span>
    <span class="spacer"></span>
    <span class="counter"><span id="scene-idx">1</span> / <span id="scene-total">1</span></span>
    <button id="prev-btn" class="ghost icon" title="Previous (← / k)">◀</button>
    <button id="next-btn" class="ghost icon" title="Next (→ / space / j)">▶</button>
    <button id="click-nav-btn" class="ghost" title="Click on the stage to advance to the next slide (c)">click-nav: off</button>
    <button id="demo-btn" class="ghost" title="Toggle local synthetic data">demo: off</button>
    <button id="theme-btn" class="ghost" title="Theme panel (t)">theme</button>
  </header>

  <div class="layout">
    <div class="canvas-wrapper" id="stage-col">
      <div class="stage">

        <!-- 1. intro -->
        <div class="stage-view" data-view="intro">
          <div class="bg-grid"></div>
          <div class="intro-block">
            <div class="intro-eyebrow">cis490 · live fleet telemetry</div>
            <div class="intro-title">behavioral<br>malware<br>detection</div>
          </div>
        </div>

        <!-- 2. stack — Python stack & libraries used in the project -->
        <div class="stage-view" data-view="stack">
          <div class="metric-stack metric-stack-wide">
            <div class="metric-eyebrow">the stack behind the live data on the right</div>
            <div class="code-grid">
              <div class="code-card">
                <div class="code-card-header">pyproject.toml</div>
                <pre class="code" id="code-pyproject"></pre>
              </div>
              <div class="code-card">
                <div class="code-card-header">receiver/app.py · file header</div>
                <pre class="code" id="code-receiver"></pre>
              </div>
            </div>
          </div>
        </div>

        <!-- 3. collect -->
        <div class="stage-view" data-view="collect">
          <div class="metric-stack">
            <div class="metric-eyebrow">episodes ingested</div>
            <div class="metric-big" id="ingest-total">0</div>
            <div class="metric-sub">
              <span id="ingest-rate">0.0</span> / sec · last 60 s ·
              total bytes on disk: <span id="ingest-bytes">0 B</span>
            </div>
            <svg class="sparkline" id="ingest-spark" viewBox="0 0 600 120" preserveAspectRatio="none">
              <path id="ingest-spark-fill" d=""></path>
              <path id="ingest-spark-path" d=""></path>
            </svg>
          </div>
        </div>

        <!-- 4. hosts -->
        <div class="stage-view" data-view="hosts">
          <div class="metric-stack">
            <div class="metric-eyebrow">per-host shipping</div>
            <div class="bars" id="host-bars">
              <div class="awaiting">awaiting snapshot…</div>
            </div>
          </div>
        </div>

        <!-- 5. db — episode database explorer -->
        <div class="stage-view" data-view="db">
          <div class="metric-stack metric-stack-wide">
            <div class="db-header">
              <div class="metric-eyebrow">episode database · last 200 records</div>
              <div class="db-count" id="db-count">0 of 0</div>
            </div>
            <div class="db-controls">
              <div class="db-tabs" id="db-tabs"></div>
              <input class="db-search" id="db-search" type="text"
                     placeholder="filter by host / id / sha…" />
            </div>
            <div class="db-table-wrap">
              <table class="db-table">
                <thead>
                  <tr>
                    <th>host</th>
                    <th>episode_id</th>
                    <th>received</th>
                    <th>size</th>
                  </tr>
                </thead>
                <tbody id="db-tbody"></tbody>
              </table>
            </div>
            <div class="db-detail" id="db-detail" hidden>
              <div class="db-detail-meta" id="db-detail-meta"></div>
              <div class="db-detail-chart-wrap">
                <svg class="db-detail-chart" id="db-detail-chart"
                     viewBox="0 0 1000 360" preserveAspectRatio="none"></svg>
              </div>
              <div class="db-detail-legend" id="db-detail-legend"></div>
            </div>
          </div>
        </div>

        <!-- 6. baseline -->
        <div class="stage-view" data-view="baseline">
          <div class="metric-stack">
            <div class="metric-eyebrow" id="phase-mix-eyebrow">phase mix · sampling dataset…</div>
            <div class="phase-stack" id="phase-stack"></div>
            <div class="phase-legend" id="phase-legend"></div>
            <div class="metric-sub" id="phase-mix-sub">computing the phase
              distribution across a random sample of episodes on disk.
              A clean fleet sits mostly in <code>clean</code>; skew toward
              <code>infecting</code> / <code>infected_running</code>
              reflects time spent under attack workloads.</div>
          </div>
        </div>

        <!-- 7. attacks -->
        <div class="stage-view" data-view="attacks">
          <div class="metric-stack">
            <div class="metric-eyebrow">attack envelopes · /proc signature per profile</div>
            <div class="profile-grid" id="profile-grid"></div>
          </div>
        </div>

        <!-- 8. chunking -->
        <div class="stage-view" data-view="chunking">
          <div class="metric-stack">
            <div class="metric-eyebrow">10-second windows · model input shape</div>
            <div class="chunk-rule" id="chunk-rule"></div>
            <div class="chunk-row" id="chunk-row"></div>
            <div class="chunk-axis" id="chunk-axis"></div>
            <div class="metric-sub">each window: 100 samples (10 Hz × 10 s),
              labeled by the phase that occupies its center.</div>
          </div>
        </div>

        <!-- 9. models -->
        <div class="stage-view" data-view="models">
          <div class="metric-stack">
            <div class="metric-eyebrow">sequence models · accuracy on held-out samples</div>
            <div class="model-bars" id="model-bars"></div>
          </div>
        </div>

        <!-- 10. training-code — how we trained the sequence models -->
        <div class="stage-view" data-view="training-code">
          <div class="metric-stack metric-stack-wide">
            <div class="metric-eyebrow">how we trained the sequence models</div>
            <div class="code-card">
              <div class="code-card-header">training/models/lstm.py</div>
              <pre class="code" id="code-train-lstm"></pre>
            </div>
          </div>
        </div>

        <!-- 11. knn — interactive 3-D scatter with mode toggle -->
        <div class="stage-view" data-view="knn">
          <div class="metric-stack">
            <div class="metric-eyebrow">window features · 3-D projection · drag to rotate</div>
            <div class="scatter3d-controls">
              <div class="scatter3d-modes">
                <button class="scatter3d-mode active" data-mode="phase">phase (ground truth)</button>
                <button class="scatter3d-mode" data-mode="predicted">KNN-predicted label</button>
                <button class="scatter3d-mode" data-mode="cluster">cluster id</button>
              </div>
              <button class="scatter3d-reset">reset view</button>
            </div>
            <div class="scatter3d-wrap">
              <canvas class="scatter3d" id="knn-scatter-canvas"></canvas>
            </div>
            <div class="phase-legend" id="knn-legend"></div>
          </div>
        </div>

        <!-- 13. references — PDF viewer with tabs + description -->
        <div class="stage-view" data-view="references">
          <div class="metric-stack metric-stack-wide ref-stack">
            <div class="metric-eyebrow">references · papers, notes, prior work</div>
            <div class="ref-tabs" id="ref-tabs"></div>
            <div class="ref-content">
              <div class="ref-viewer-wrap">
                <iframe class="ref-viewer" id="ref-viewer"
                        title="reference viewer"
                        sandbox="allow-same-origin allow-scripts allow-popups allow-forms"></iframe>
              </div>
              <div class="ref-description" id="ref-description"></div>
            </div>
          </div>
        </div>

        <!-- 12. perf -->
        <div class="stage-view" data-view="perf">
          <div class="metric-stack">
            <div class="metric-eyebrow">accuracy vs inference cost</div>
            <svg class="scatter" id="perf-scatter" viewBox="0 0 600 360" preserveAspectRatio="xMidYMid meet"></svg>
            <div class="metric-sub">x: μs / window (lower is better) ·
              y: held-out accuracy (higher is better).</div>
          </div>
        </div>

      </div>
      <button id="next-fab" class="fab" data-no-advance title="Next (→)">▼</button>
    </div>

    <article class="article">

      <section class="scene" data-stage="intro">
        <div class="prose">
          <p class="lede">Most malware doesn't look like malware in a database
            — it looks like a process behaving badly.</p>
          <p>An <strong>intrusion detection system</strong> spots the bad
            behavior; an <strong>intrusion prevention system</strong> stops it.
            Both depend on knowing what bad behavior <em>looks like</em> at the
            level of telemetry the device can actually see.</p>
          <p>This deck is the live face of the dataset we're building to teach
            a model that distinction — every panel on the left is a slice of
            real data shipping in right now.</p>
          <p class="hint">scroll, click, or → to advance</p>
        </div>
      </section>

      <section class="scene" data-stage="stack">
        <div class="prose">
          <h2>Live, not staged</h2>
          <p>Every panel from here on is real data from real devices —
            counters, bars, the episode database, all driven by the
            <code>cis490-receiver</code> service running on this Pi as
            you scroll.</p>
          <p>The code on the left is how it gets here. Four runtime deps:
            <strong>starlette</strong> + <strong>uvicorn</strong> for the
            async HTTP and WebSocket surface, <strong>msgpack</strong>
            talks to Metasploit's RPC, <strong>pycdlib</strong> builds the
            lab-VM cidata ISOs. Everything else is the standard library,
            and every dep is annotated with a one-line reason it's there.</p>
        </div>
      </section>

      <section class="scene" data-stage="collect">
        <div class="prose">
          <h2>Collecting the dataset</h2>
          <p>Each lab host on the WireGuard mesh boots a real Alpine VM, runs
            a profile-driven workload inside it, and samples
            <code>/proc/&lt;qemu_pid&gt;</code> at 10&nbsp;Hz. Every ~30&nbsp;seconds
            the labeled tarball is shipped to this Pi over mTLS.</p>
          <p>The counter on the left is the running total, sourced from the
            receiver's <code>index.jsonl</code> on disk. The sparkline is the
            arrival rate over the last sixty seconds.</p>
        </div>
      </section>

      <section class="scene" data-stage="hosts">
        <div class="prose">
          <h2>A multi-host fleet</h2>
          <p>Running the same orchestrator on multiple hosts gives novel,
            non-overlapping data per host — no central coordinator. Each host
            pulls a different slice of the manifest, so the dataset grows in
            parallel.</p>
          <p>The numbers below are absolute episode counts on disk, refreshed
            from <code>/var/lib/cis490/episodes/&lt;host&gt;/</code> every
            thirty seconds.</p>
        </div>
      </section>

      <section class="scene" data-stage="db">
        <div class="prose">
          <h2>The dataset, browsable</h2>
          <p>Every row is one labeled episode tarball stored at
            <code>/var/lib/cis490/episodes/&lt;host&gt;/&lt;id&gt;.tar.zst</code>
            after the receiver verifies its SHA-256 and writes it through.</p>
          <p>Filter by host with the tabs, or grep by host / episode id /
            sha with the search box. Click a row for the full
            <code>index.jsonl</code> record. The view holds the most recent
            two hundred records — older history is on disk, indexable
            from the receiver.</p>
        </div>
      </section>

      <section class="scene" data-stage="baseline">
        <div class="prose">
          <h2>A baseline of normal</h2>
          <p>Before we can detect a deviation, we have to know what the fleet
            looks like when it's healthy. The stacked bar shows the fraction
            of the last five minutes of fleet activity that sat in each phase
            — a healthy mix has plenty of <code>clean</code>.</p>
          <p>If the model only ever sees <code>clean</code>, it overfits to
            "everything is fine." The phase schedule fixes that by forcing the
            workload to walk through every phase on every run.</p>
        </div>
      </section>

      <section class="scene" data-stage="attacks">
        <div class="prose">
          <h2>Linking attack to telemetry</h2>
          <p>The same six profiles run across every host, and each one
            produces a different envelope in <code>/proc</code>. A
            cryptominer pegs one core for minutes. A bursty C2 channel sits
            idle, then exhales three packets. Ransomware walks the
            filesystem and saturates I/O.</p>
          <p>The thumbnails on the left are the canonical envelopes the
            model has to learn to recognize — same axes, different shapes.
            That shape difference is what makes detection tractable.</p>
        </div>
      </section>

      <section class="scene" data-stage="chunking">
        <div class="prose">
          <h2>Ten-second windows</h2>
          <p>Models eat fixed-size inputs. We chop each episode into
            10-second windows — 100 samples per window at 10&nbsp;Hz — and
            label each window with the phase that occupies its center.</p>
          <p>Window size is a knob. Too short and the model can't see slow
            envelopes (low-and-slow malware, idle C2). Too long and you can't
            react fast enough to be a useful prevention signal. Ten seconds
            is the starting point we tune around.</p>
        </div>
      </section>

      <section class="scene" data-stage="models">
        <div class="prose">
          <h2>Sequence models</h2>
          <p><strong>RNN, GRU, LSTM</strong> — recurrent models that read the
            window one timestep at a time and carry state forward. Cheap,
            mature, easy to interpret.</p>
          <p><strong>BERT-style transformer</strong> — the window becomes a
            sequence of "tokens"; attention captures cross-position context
            instead of accumulating it through a hidden state. More
            parameters, more compute, more room to overfit a small dataset.</p>
          <p>Same input, same labels, four different inductive biases. The
            comparison on the left is the punchline of the whole project.</p>
        </div>
      </section>

      <section class="scene" data-stage="training-code">
        <div class="prose">
          <h2>How we trained them</h2>
          <p>One trainer per model — load the windowed dataset, define the
            network, train, evaluate. Same shape for RNN, GRU, LSTM, BERT,
            so you can read all four side-by-side and the only differences
            are the architecture itself.</p>
          <p>The code on the left is the LSTM trainer.
            PyTorch's <code>DataLoader</code> handles windowing,
            <code>nn.LSTM</code> is one line, the loop is six.
            No custom loss, no rate schedule, no manual batching —
            anything fancier has to earn its place by beating the simple
            version on held-out samples.</p>
        </div>
      </section>

      <section class="scene" data-stage="knn">
        <div class="prose">
          <h2>Nearest-neighbor as a sanity check</h2>
          <p>Before anything fancy: engineer summary features per window
            (mean, std, p95, slope, zero-bucket counts per channel) and run
            <strong>KNN</strong> in that feature space.</p>
          <p>If the phase clusters separate visibly in two dimensions, KNN
            already does most of the work and a deep model is only buying
            marginal improvement. If they don't separate, you've learned
            something about the feature engineering before training a single
            epoch.</p>
        </div>
      </section>

      <section class="scene" data-stage="perf">
        <div class="prose">
          <h2>Accuracy vs complexity</h2>
          <p>Bigger models earn better numbers in the validation set — but
            they also need more parameters, more inference time, and more
            memory at the edge. The deployed model has to fit on the device
            it's protecting.</p>
          <p>The scatter on the left is the usable trade-off curve: every
            point above and to the left of where you currently sit is a
            reachable upgrade. The point in the bottom-right is a model
            you'd never ship.</p>
        </div>
      </section>

      <section class="scene" data-stage="references">
        <div class="prose">
          <h2>References</h2>
          <p>The papers, notes, and prior work this project leans on.
            Pick a tab on the left to load the document; the viewer
            takes the bulk of the stage so you can scroll through
            without leaving the deck.</p>
          <p class="hint">end of deck · ← to flip back</p>
        </div>
      </section>

      <div class="scene-end-spacer"></div>
    </article>
  </div>

  <script src="/static/dashboard.js?v=a087e0a4"></script>
</body>
</html>