CIS490/receiver/version_gate.py

"""Live commit allow-list for the receiver.

The receiver only stores episodes whose `meta.json::code_version.commit`
matches a commit in the canonical repository's recent history. Two
backends are supported:

  forgejo: queries
    GET /api/v1/repos/<owner>/<name>/commits?sha=<branch>&limit=<n>
    on a Forgejo instance the maintainer pushes to. PRODUCTION
    DEFAULT — Forgejo is the authoritative source of truth that
    both lab hosts and the receiver pull from, so when the
    maintainer pushes new code the new commit becomes acceptable
    automatically.

  git: runs `git log -n <window> --format=%H` against a local
    checkout. Used by tests + dev-only setups where a Forgejo
    instance isn't available.

Cache TTL: 5s by default — push a commit, wait 5s, the new hash is
in the allow-list. No service restart.

Episodes from older code (before a known bug fix) get rejected with
HTTP 412 + a remediation block telling the lab-host operator to pull
main and re-run the install. That keeps bad data out of the index.
"""

from __future__ import annotations

import json
import logging
import subprocess
import threading
import time
import urllib.parse
import urllib.request
from pathlib import Path


log = logging.getLogger("cis490.receiver.version_gate")


class VersionGate:
    """Maintains the set of acceptable commit hashes via either a
    Forgejo HTTP API call or a local `git log`.

    Args:
      forgejo_url: e.g. "http://10.100.0.1:3000". Setting this enables
        the Forgejo backend; ``repo_owner``/``repo_name``/``branch``
        must also be set. ``auth_token`` is optional but recommended
        (so a private Forgejo doesn't need to be world-readable).
      repo_path: local checkout (fallback / test backend). Used iff
        ``forgejo_url`` is None.
      window: how many recent commits count as valid.
      cache_ttl_s: how long to trust the cache before refreshing.
    """

    def __init__(
        self,
        repo_path: Path | None = None,
        *,
        window: int = 100,
        cache_ttl_s: float = 5.0,
        forgejo_url: str | None = None,
        repo_owner: str | None = None,
        repo_name: str | None = None,
        branch: str = "main",
        auth_token: str | None = None,
    ) -> None:
        self.repo_path = Path(repo_path) if repo_path else None
        self.window = int(window)
        self.cache_ttl_s = float(cache_ttl_s)
        self.forgejo_url = forgejo_url.rstrip("/") if forgejo_url else None
        self.repo_owner = repo_owner
        self.repo_name = repo_name
        self.branch = branch
        self.auth_token = auth_token
        if not self.forgejo_url and not self.repo_path:
            raise ValueError("VersionGate needs forgejo_url or repo_path")
        self._lock = threading.Lock()
        self._cached_hashes: frozenset[str] = frozenset()
        self._cached_at: float = 0.0
        self._head: str | None = None

    # ---- backend dispatch -----------------------------------------------

    def _refresh(self) -> None:
        # Try forgejo first when configured. If it fails AND we also
        # have a local repo path, fall back to git — this lets the
        # receiver keep working through a forgejo blip (e.g. both
        # services restarting on the same host) without flat-rejecting
        # every PUT with not-in-window. The local repo on the
        # receiver's own host is the closest thing to ground truth we
        # have when the canonical source is unreachable.
        source = "forgejo" if self.forgejo_url else "git"
        if self.forgejo_url:
            hashes, head = self._refresh_forgejo()
            if not hashes and self.repo_path is not None:
                log.info("forgejo refresh empty; falling back to local git "
                         "at %s", self.repo_path)
                hashes, head = self._refresh_git()
                if hashes:
                    source = "git-fallback"
        else:
            hashes, head = self._refresh_git()
        if not hashes:
            log.warning("version-gate refresh empty; keeping prior cache "
                        "of %d hashes", len(self._cached_hashes))
            self._cached_at = time.monotonic()
            return
        with self._lock:
            self._cached_hashes = frozenset(hashes)
            self._cached_at = time.monotonic()
            self._head = head
        log.info("version-gate refreshed: %d valid hashes, head=%s, source=%s",
                 len(hashes), head[:12] if head else "?", source)

    def _refresh_forgejo(self) -> tuple[set[str], str | None]:
        """GET /api/v1/repos/<owner>/<name>/commits?sha=<branch>&limit=<n>."""
        url = (
            f"{self.forgejo_url}/api/v1/repos/{self.repo_owner}/"
            f"{self.repo_name}/commits"
            f"?sha={urllib.parse.quote(self.branch)}&limit={self.window}"
        )
        req = urllib.request.Request(url)
        if self.auth_token:
            req.add_header("Authorization", f"token {self.auth_token}")
        try:
            with urllib.request.urlopen(req, timeout=3) as r:
                rows = json.loads(r.read().decode("utf-8"))
        except (urllib.request.HTTPError, urllib.request.URLError,
                json.JSONDecodeError, OSError) as e:
            log.warning("forgejo refresh failed (%s); keeping prior cache", e)
            return set(), self._head
        hashes: set[str] = set()
        head: str | None = None
        for i, row in enumerate(rows or []):
            sha = row.get("sha")
            if isinstance(sha, str) and len(sha) == 40:
                sha = sha.lower()
                hashes.add(sha)
                if i == 0:
                    head = sha
        return hashes, head

    def _refresh_git(self) -> tuple[set[str], str | None]:
        """`git log -n <window> --format=%H` from `repo_path`."""
        try:
            out = subprocess.run(
                ["git", "-C", str(self.repo_path),
                 "log", f"-n{self.window}", "--format=%H"],
                check=True, capture_output=True, text=True, timeout=3,
            ).stdout
        except (subprocess.SubprocessError, FileNotFoundError, OSError) as e:
            log.warning("git refresh failed (%s); keeping prior cache", e)
            return set(), self._head
        lines = [h.strip().lower() for h in out.splitlines() if h.strip()]
        head = lines[0] if lines else None
        return set(lines), head

    def _maybe_refresh(self) -> None:
        if (time.monotonic() - self._cached_at) > self.cache_ttl_s:
            self._refresh()

    def head(self) -> str | None:
        """Return the most recent valid commit (HEAD of the branch
        the receiver is mirroring). Used by the 412 response so the
        client knows what to pull to."""
        self._maybe_refresh()
        return self._head

    def valid_count(self) -> int:
        self._maybe_refresh()
        return len(self._cached_hashes)

    def check(self, commit: str | None) -> tuple[bool, str | None]:
        """Return (ok, reason). ``reason`` is None on success, a
        short string identifying the failure mode otherwise."""
        if not commit:
            return False, "missing"
        c = commit.strip().lower()
        if len(c) != 40 or not all(ch in "0123456789abcdef" for ch in c):
            return False, "bad-format"
        self._maybe_refresh()
        with self._lock:
            allowed = self._cached_hashes
        if c in allowed:
            return True, None
        return False, "not-in-window"