# Sample manifest — what each fleet slot picks from. # # Each entry has three things: # - identity (name, family, category) for labeling # - acquisition (source, sha256, url) for reproducibility # - behaviour (profile) so the synthetic load mimic can run a # reasonable proxy until the real sample lands at samples/store/. # # When the real malware binary is present at samples/store/, # the orchestrator runs THAT inside the guest. When it's absent, the # orchestrator falls back to the mimic workload with the matching # profile so the fleet still produces *labeled, varied* data while # we collect the real samples. Either way, meta.json records which # path the episode took, so trainers can filter on # meta.sample.kind ∈ {real, mimic}. # # Families below are CHOSEN AND TESTED to match theZoo entries that # contain a Linux 32-bit Intel 80386 ELF binary — i.e. binaries that # will execute natively inside our Metasploitable2 (Ubuntu 8.04 i386) # target VM. Verified against a fresh theZoo clone on 2026-05-01; # tools/auto_fetch_samples.py prefers the Linux-i386 ELF in each # multi-binary zip via `_is_linux_i386_elf` magic-byte sniffing. [[sample]] name = "linux-encoder-ransomware" family = "Linux.Encoder" category = "ransomware" profile = "io-walk" description = "Linux.Encoder.1 (Linux i386 ELF). The first known Linux ransomware. Heavy disk write + fs walk producing a per-file overwrite envelope." [[sample]] name = "linux-wirenet-rat" family = "Linux.Wirenet" category = "rat" profile = "shell-resident" description = "Linux.Wirenet (Linux i386 ELF). RAT with a long-lived TCP socket pinned to a fixed peer; occasional command bursts." [[sample]] name = "linux-rex-ransomware" family = "Ransomware.Rex" category = "ransomware" profile = "io-walk" description = "Ransomware.Rex (Linux i386 ELF, written in Go). File-walk encryption envelope with periodic CPU spikes during AES." [[sample]] name = "linux-neurevt-bot" family = "Neurevt" category = "botnet" profile = "scan-and-dial" description = "Neurevt 1.7 (Linux i386 ELF). Botnet panel binary; SYN scans + periodic dial-home pattern." [[sample]] name = "linux-earthkrahang-apt" family = "EarthKrahang" category = "rat" profile = "bursty-c2" description = "EarthKrahang 2024 (Linux i386 ELF). APT backdoor; long idle + periodic small TCP egress bursts." # Mimic-only fallback families. theZoo doesn't have a clean Linux i386 # binary for these; auto_fetch_samples.py logs a warning and the # orchestrator stays on the mimic workload until a real binary is # staged manually at samples/store/. Kept here so the trainer # can still collect cpu-saturate and low-and-slow envelopes (those # profiles' theZoo coverage is sparse). [[sample]] name = "xmrig-cryptominer" family = "XMRig" category = "cryptominer" profile = "cpu-saturate" description = "Mimic only on Metasploitable2 (no Linux-i386 XMRig in theZoo)." [[sample]] name = "kovter-class-stealth" family = "Kovter" category = "fileless" profile = "low-and-slow" description = "Mimic only — Kovter is Windows-native; theZoo's binary won't run on Metasploitable2 i386."