CIS490/tests/test_containment.py

"""§4.13 containment regression tests.

Every shell script that boots a target VM (build.sh, verify.sh,
launch_target.sh) must hold the containment posture:
  * SLIRP `restrict=on` — no upstream egress, OR explicit operator
    -declared bridge with no internet route. (Both shellshock build
    and verify use restrict=on; build temporarily allows egress for
    package fetch but verify confirms-with-restrict.)
  * NO `-virtfs`, `-fsdev`, `-9pfs`, or any host-shared mount.
  * `snapshot=on` on the boot drive so verification doesn't mutate
    the artifact.
  * No `sudo` / setuid wrapper around `qemu-system-*` invocation.

These are static checks against the shell scripts in the repo —
they catch a regression at PR time, before any image gets built.
A complementary runtime check would have to actually boot the VM,
which is too heavy for CI; the spec.toml validator gives us the
runtime declaration that the verifier must satisfy.
"""

from __future__ import annotations

import re
from pathlib import Path

import pytest


REPO_ROOT = Path(__file__).resolve().parent.parent
TARGETS_DIR = REPO_ROOT / "vm" / "targets"


def _strip_bash_comments(src: str) -> str:
    """Drop comment-only lines so the containment scan doesn't trip on
    inline documentation that mentions the forbidden flags."""
    out = []
    for line in src.splitlines():
        stripped = line.lstrip()
        if stripped.startswith("#"):
            continue
        # Inline comments after code: trim everything after `#` UNLESS
        # the # is inside single or double quotes. Cheap heuristic:
        # only strip when there's a space before the #.
        idx = line.find(" #")
        if idx > 0:
            line = line[:idx]
        out.append(line)
    return "\n".join(out)


def _every_target_with_verify():
    if not TARGETS_DIR.exists():
        return []
    out = []
    for child in TARGETS_DIR.iterdir():
        if not child.is_dir():
            continue
        verify = child / "verify.sh"
        if verify.exists():
            out.append((child.name, verify))
    return out


def _every_target_with_build():
    if not TARGETS_DIR.exists():
        return []
    out = []
    for child in TARGETS_DIR.iterdir():
        if not child.is_dir():
            continue
        build = child / "build.sh"
        if build.exists():
            out.append((child.name, build))
    return out


@pytest.mark.parametrize(
    "name,path",
    _every_target_with_verify() or [pytest.param("none", None, marks=pytest.mark.skip(reason="no targets yet"))],
)
def test_verify_uses_restrict_on(name, path: Path) -> None:
    """Every verify.sh MUST boot the produced image with no upstream
    egress. The verifier proves the image satisfies its spec under the
    SAME containment posture the orchestrator will use at episode time
    — a target that needs internet access during verification can't be
    trusted to behave under §4.13."""
    src = path.read_text()
    qemu_inv = re.search(r"qemu-system-x86_64\b[^`]*?(?=\n[A-Z#]|\Z)", src,
                         re.DOTALL)
    assert qemu_inv, f"{name}: no qemu-system-x86_64 invocation in verify.sh"
    qemu_text = qemu_inv.group(0)
    assert "restrict=on" in qemu_text, (
        f"{name}: verify.sh qemu invocation must include "
        f"`restrict=on` on its netdev (§4.13). Got:\n{qemu_text}"
    )


@pytest.mark.parametrize(
    "name,path",
    _every_target_with_verify() or [pytest.param("none", None, marks=pytest.mark.skip(reason="no targets yet"))],
)
def test_verify_no_shared_filesystem(name, path: Path) -> None:
    """Targets MUST NOT have host-shared mounts during verification."""
    src = _strip_bash_comments(path.read_text())
    for forbidden in ("-virtfs", "-fsdev", "9pfs", "9p,trans"):
        assert forbidden not in src, (
            f"{name}: verify.sh contains `{forbidden}` — host-shared "
            f"filesystem is a §4.13 containment regression."
        )


@pytest.mark.parametrize(
    "name,path",
    _every_target_with_verify() or [pytest.param("none", None, marks=pytest.mark.skip(reason="no targets yet"))],
)
def test_verify_uses_snapshot_on(name, path: Path) -> None:
    """`snapshot=on` so verification doesn't mutate the build
    artifact's bytes — the sha256 must be stable from build to
    publish to dataset-time use."""
    src = path.read_text()
    # Search for the boot drive line; allow flexible spacing/quoting.
    drive_match = re.search(r'-drive[^\n]*file=[^\n]*\.qcow2[^\n]*', src)
    if drive_match:
        assert "snapshot=on" in drive_match.group(0), (
            f"{name}: verify.sh boot drive must use snapshot=on (§4.13)"
        )


@pytest.mark.parametrize(
    "name,path",
    _every_target_with_verify() or [pytest.param("none", None, marks=pytest.mark.skip(reason="no targets yet"))],
)
def test_verify_no_sudo_qemu(name, path: Path) -> None:
    """QEMU MUST run as the unprivileged caller. No sudo / setuid
    wrappers (§4.13)."""
    src = path.read_text()
    assert "sudo qemu-system" not in src, (
        f"{name}: verify.sh wraps qemu-system in sudo — privileged "
        f"QEMU is a §4.13 containment regression."
    )


@pytest.mark.parametrize(
    "name,path",
    _every_target_with_build() or [pytest.param("none", None, marks=pytest.mark.skip(reason="no targets yet"))],
)
def test_build_no_shared_filesystem(name, path: Path) -> None:
    """Even during build (when egress is permitted for package fetch),
    no host-shared filesystem mount."""
    src = _strip_bash_comments(path.read_text())
    for forbidden in ("-virtfs", "-fsdev", "9pfs", "9p,trans"):
        assert forbidden not in src, (
            f"{name}: build.sh contains `{forbidden}` — host-shared "
            f"filesystem is a §4.13 containment regression."
        )


@pytest.mark.parametrize(
    "name,path",
    _every_target_with_build() or [pytest.param("none", None, marks=pytest.mark.skip(reason="no targets yet"))],
)
def test_build_no_sudo_qemu(name, path: Path) -> None:
    src = path.read_text()
    assert "sudo qemu-system" not in src, (
        f"{name}: build.sh wraps qemu-system in sudo (§4.13)"
    )


# ---------------------------------------------------------------------
# Cross-cutting: every target with a build.sh must also have verify.sh
# and vice versa. Half-built targets are a §1 violation.
# ---------------------------------------------------------------------


def test_every_target_has_complete_trio() -> None:
    """spec.toml + build.sh + verify.sh — all three or none. A target
    with a spec but no build script is a stub; a build with no verify
    bypasses §4.2 acceptance. PIPELINE.md §1 default-to-removal: if
    you can't ship the trio, ship nothing."""
    if not TARGETS_DIR.exists():
        return
    for child in TARGETS_DIR.iterdir():
        if not child.is_dir():
            continue
        files = {p.name for p in child.iterdir() if p.is_file()}
        # At least one of {spec.toml, build.sh, verify.sh} present means
        # all three required.
        relevant = {"spec.toml", "build.sh", "verify.sh"} & files
        if not relevant:
            continue
        missing = {"spec.toml", "build.sh", "verify.sh"} - files
        assert not missing, (
            f"target {child.name}: incomplete trio, missing {missing}. "
            f"§1 default-to-removal: complete the trio or remove the dir."
        )