CIS490/training/eval_/breakdown.py

"""Per-profile and per-host metric breakdown.

A model with macro F1 = 0.55 might be 0.85 on five profiles and 0.10
on the sixth. The single number hides exactly the kind of failure mode
this project cares about (one malware family the model can't see).
This module produces the breakdown table.
"""
from __future__ import annotations

from dataclasses import asdict, dataclass

import numpy as np

from training.eval_._metrics import _f1, _macro_f1, bootstrap_macro_f1


@dataclass
class CellMetrics:
    n: int
    macro_f1: float
    macro_f1_lo: float
    macro_f1_hi: float
    per_class_f1: dict[int, float]


def by_profile(
    *,
    y_true: np.ndarray, y_pred: np.ndarray,
    profiles: list[str], n_classes: int,
    n_resamples: int = 500,
) -> dict[str, CellMetrics]:
    """One row per profile observed in test."""
    out: dict[str, CellMetrics] = {}
    profs = np.asarray(profiles)
    for prof in sorted({p for p in profs if p}):
        m = profs == prof
        if not m.any():
            continue
        ci = bootstrap_macro_f1(y_true[m], y_pred[m], n_classes,
                                 n_resamples=n_resamples)
        per_class = {k: _f1(y_true[m], y_pred[m], k) for k in range(n_classes)}
        out[prof] = CellMetrics(
            n=int(m.sum()), macro_f1=ci.point,
            macro_f1_lo=ci.low, macro_f1_hi=ci.high,
            per_class_f1=per_class,
        )
    return out


def by_host(
    *,
    y_true: np.ndarray, y_pred: np.ndarray,
    hosts: list[str], n_classes: int,
    n_resamples: int = 500,
) -> dict[str, CellMetrics]:
    out: dict[str, CellMetrics] = {}
    hs = np.asarray(hosts)
    for h in sorted({x for x in hs if x}):
        m = hs == h
        if not m.any():
            continue
        ci = bootstrap_macro_f1(y_true[m], y_pred[m], n_classes,
                                 n_resamples=n_resamples)
        per_class = {k: _f1(y_true[m], y_pred[m], k) for k in range(n_classes)}
        out[h] = CellMetrics(
            n=int(m.sum()), macro_f1=ci.point,
            macro_f1_lo=ci.low, macro_f1_hi=ci.high,
            per_class_f1=per_class,
        )
    return out