CIS490/tests/test_manifest.py

"""Tests for orchestrator/manifest.py — the canonical experiment
manifest loader (PIPELINE.md §4.1)."""

from __future__ import annotations

from pathlib import Path

import pytest

from orchestrator.manifest import (
    KNOWN_COLLECTORS, KNOWN_PHASES, ManifestError, load_canonical,
)


REPO_ROOT = Path(__file__).resolve().parent.parent

MINIMAL_VALID = """
schema_version = 1
name = "test-experiment"

[experiment]
ram_per_vm_mib = 320

[[experiment.schedule.phases]]
name = "clean"
seconds = 1.0

[[experiment.schedule.phases]]
name = "armed"
seconds = 1.0

[experiment.fleet]
max_concurrent_ceiling = 0
max_tier3_slots = 0

[collectors]
active = ["proc"]

[collectors.intervals]
proc_ms = 100
qmp_ms = 1000
perf_ms = 100
guest_agent_ms = 100
pcap_snaplen = 256
netflow_bucket_ms = 100

[catalog]
modules = []

[targets]
images = []

[samples]
manifest_path = "samples/manifest.toml"
"""


def _write_manifest(repo: Path, body: str) -> None:
    (repo / "manifest.toml").write_text(body)


def test_canonical_manifest_in_repo_loads() -> None:
    """The actual manifest.toml shipped in the repo MUST load and
    validate. If it doesn't, every lab host fails preflight."""
    m = load_canonical(REPO_ROOT)
    assert m.schema_version == 1
    assert m.name == "cis490-spectral-v1"
    # Every active collector must be in KNOWN_COLLECTORS.
    for c in m.collectors_active:
        assert c in KNOWN_COLLECTORS
    # Every scheduled phase name must be in KNOWN_PHASES.
    for p in m.schedule:
        assert p.name in KNOWN_PHASES


def test_loads_minimal_valid(tmp_path: Path) -> None:
    _write_manifest(tmp_path, MINIMAL_VALID)
    m = load_canonical(tmp_path)
    assert m.name == "test-experiment"
    assert len(m.schedule) == 2
    assert m.fleet.max_concurrent_ceiling == 0
    assert m.collectors_active == ("proc",)


def test_missing_file_raises_manifest_error(tmp_path: Path) -> None:
    with pytest.raises(ManifestError, match="not found"):
        load_canonical(tmp_path)


def test_unsupported_schema_version_raises(tmp_path: Path) -> None:
    _write_manifest(tmp_path, MINIMAL_VALID.replace(
        "schema_version = 1", "schema_version = 2"
    ))
    with pytest.raises(ManifestError, match="schema_version=2"):
        load_canonical(tmp_path)


def test_unknown_collector_in_active_raises(tmp_path: Path) -> None:
    _write_manifest(tmp_path, MINIMAL_VALID.replace(
        'active = ["proc"]', 'active = ["proc", "totally_not_real"]'
    ))
    with pytest.raises(ManifestError, match="totally_not_real"):
        load_canonical(tmp_path)


def test_duplicate_collector_in_active_raises(tmp_path: Path) -> None:
    _write_manifest(tmp_path, MINIMAL_VALID.replace(
        'active = ["proc"]', 'active = ["proc", "proc"]'
    ))
    with pytest.raises(ManifestError, match="duplicate"):
        load_canonical(tmp_path)


def test_unknown_phase_name_raises(tmp_path: Path) -> None:
    bad = MINIMAL_VALID.replace('name = "armed"', 'name = "magical"', 1)
    _write_manifest(tmp_path, bad)
    with pytest.raises(ManifestError, match="magical"):
        load_canonical(tmp_path)


def test_negative_phase_seconds_raises(tmp_path: Path) -> None:
    bad = MINIMAL_VALID.replace('seconds = 1.0\n\n[[experiment.schedule.phases]]\nname = "armed"\nseconds = 1.0',
                                'seconds = 1.0\n\n[[experiment.schedule.phases]]\nname = "armed"\nseconds = -5.0')
    _write_manifest(tmp_path, bad)
    with pytest.raises(ManifestError, match="must be > 0"):
        load_canonical(tmp_path)


def test_negative_ram_raises(tmp_path: Path) -> None:
    bad = MINIMAL_VALID.replace("ram_per_vm_mib = 320", "ram_per_vm_mib = -1")
    _write_manifest(tmp_path, bad)
    with pytest.raises(ManifestError, match="positive"):
        load_canonical(tmp_path)


def test_catalog_entry_missing_verified_against_raises(tmp_path: Path) -> None:
    bad = MINIMAL_VALID.replace(
        "[catalog]\nmodules = []",
        '[catalog]\n[[catalog.modules]]\nname = "fixture"\nlast_verified = "abc"\n',
    )
    _write_manifest(tmp_path, bad)
    with pytest.raises(ManifestError, match="verified_against"):
        load_canonical(tmp_path)


def test_catalog_entry_with_both_fields_loads(tmp_path: Path) -> None:
    valid = MINIMAL_VALID.replace(
        "[catalog]\nmodules = []",
        ('[catalog]\n[[catalog.modules]]\nname = "fixture"\n'
         'verified_against = "test-target"\nlast_verified = "abc"\n'),
    )
    _write_manifest(tmp_path, valid)
    m = load_canonical(tmp_path)
    assert len(m.catalog) == 1
    assert m.catalog[0].name == "fixture"
    assert m.catalog[0].verified_against == "test-target"
    assert m.catalog[0].last_verified == "abc"


def test_to_meta_round_trips_to_json_safe(tmp_path: Path) -> None:
    """meta.json embedding requires the to_meta dict be json-encodable."""
    import json
    _write_manifest(tmp_path, MINIMAL_VALID)
    m = load_canonical(tmp_path)
    encoded = json.dumps(m.to_meta())
    decoded = json.loads(encoded)
    assert decoded["schema_version"] == 1
    assert decoded["name"] == "test-experiment"