meta.json: stamp code_version (commit, branch, dirty) per episode

Closes a real reproducibility gap. Three weeks of bug fixes have
shipped (probe fix in 2707709, multi-signal classifier in 321ea63,
mandatory tier-4 in 265f3ad, etc.); without a per-episode
code_version, trainers can't tell which episodes came from buggy
pre-fix code and have to scan every tarball to guess.

Resolution priority (cached across episodes):
  1. $INSTALL_ROOT/VERSION (production — install-lab-host.sh writes
     it at install time since /opt/cis490 is a flat copy with no .git)
  2. git rev-parse HEAD from the repo root (dev clones)
  3. {"commit": "unknown", source: "unknown"} so the field is always
     present (filterable)

Output shape, always present in meta.json:
  "code_version": {
    "commit": "<40-hex>" | "unknown",
    "branch": "<name>" | null,
    "dirty":  bool | null,
    "source": "VERSION-file" | "git" | "unknown"
  }

install-lab-host.sh writes VERSION at install time with the source
repo's git rev-parse HEAD + branch + clean-tree flag + install
timestamp. Lab-host agents that pull main + re-run install-lab-host.sh
get a fresh stamp automatically.

148/148 tests pass; test_episode_against_self_pid_produces_full_directory
asserts the field's presence + valid `source` value.
This commit is contained in:
max 2026-05-01 01:29:01 -05:00
parent 265f3ad313
commit 5c0bc9af8e
3 changed files with 104 additions and 0 deletions

View file

@ -29,6 +29,7 @@ from __future__ import annotations
import json import json
import logging import logging
import os import os
import subprocess
import threading import threading
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
@ -42,6 +43,79 @@ from samples.manifest import Sample
from .ulid import new_ulid from .ulid import new_ulid
# Repo root for the version probe — orchestrator/episode.py lives at
# <repo>/orchestrator/episode.py.
_REPO_ROOT = Path(__file__).resolve().parent.parent
# Cached so we don't fork `git` on every episode.
_CODE_VERSION_CACHE: dict | None = None
def _resolve_code_version() -> dict:
"""Return a small dict identifying the code that produced this episode.
Order of resolution:
1. ``$INSTALL_ROOT/VERSION`` (written by install-lab-host.sh at
install time typical production path, since /opt/cis490
doesn't carry a .git/ dir)
2. ``git rev-parse HEAD`` from the repo root (dev clones)
3. ``{"commit": "unknown"}`` so meta.json always has the field
Output shape (always present):
{"commit": "<40-hex>" | "unknown",
"branch": "<name>" | None,
"dirty": bool | None,
"source": "VERSION-file" | "git" | "unknown"}
Result is cached at module level so per-episode meta emission is
free after the first read."""
global _CODE_VERSION_CACHE
if _CODE_VERSION_CACHE is not None:
return _CODE_VERSION_CACHE
# 1. VERSION file (production install).
for cand in (_REPO_ROOT / "VERSION", Path("/opt/cis490/VERSION")):
if cand.is_file():
try:
v = json.loads(cand.read_text())
if isinstance(v, dict) and v.get("commit"):
v.setdefault("source", "VERSION-file")
_CODE_VERSION_CACHE = v
return v
except (json.JSONDecodeError, OSError):
pass
# 2. git rev-parse from repo root (dev clones).
try:
commit = subprocess.run(
["git", "-C", str(_REPO_ROOT), "rev-parse", "HEAD"],
capture_output=True, text=True, timeout=2, check=True,
).stdout.strip()
branch = subprocess.run(
["git", "-C", str(_REPO_ROOT), "rev-parse", "--abbrev-ref", "HEAD"],
capture_output=True, text=True, timeout=2,
).stdout.strip() or None
# `git status --porcelain` is empty iff the working tree is clean.
porcelain = subprocess.run(
["git", "-C", str(_REPO_ROOT), "status", "--porcelain"],
capture_output=True, text=True, timeout=2,
).stdout
_CODE_VERSION_CACHE = {
"commit": commit,
"branch": branch,
"dirty": bool(porcelain.strip()),
"source": "git",
}
return _CODE_VERSION_CACHE
except (subprocess.SubprocessError, FileNotFoundError, OSError):
pass
_CODE_VERSION_CACHE = {
"commit": "unknown", "branch": None, "dirty": None, "source": "unknown",
}
return _CODE_VERSION_CACHE
log = logging.getLogger("cis490.orchestrator") log = logging.getLogger("cis490.orchestrator")
SCHEMA_VERSION = 1 SCHEMA_VERSION = 1
@ -364,6 +438,7 @@ class EpisodeRunner:
return { return {
"episode_id": self.episode_id, "episode_id": self.episode_id,
"schema_version": SCHEMA_VERSION, "schema_version": SCHEMA_VERSION,
"code_version": _resolve_code_version(),
"started_at_wall": started_at_wall, "started_at_wall": started_at_wall,
"ended_at_wall": None, "ended_at_wall": None,
"host_fingerprint": { "host_fingerprint": {

View file

@ -69,6 +69,26 @@ install -d -o "$SERVICE_USER" -g "$SERVICE_USER" -m 0755 "$INSTALL_ROOT"
cp -aT "$REPO_ROOT" "$INSTALL_ROOT" cp -aT "$REPO_ROOT" "$INSTALL_ROOT"
chown -R "$SERVICE_USER":"$SERVICE_USER" "$INSTALL_ROOT" chown -R "$SERVICE_USER":"$SERVICE_USER" "$INSTALL_ROOT"
# Stamp a VERSION file at install time so episodes can record the
# code commit they were generated by. /opt/cis490 is a flat copy
# (no .git/), so we capture the source repo's HEAD here. Trainers
# read meta.json.code_version to filter out episodes from buggy
# pre-fix code.
if VC="$(cd "$REPO_ROOT" && git rev-parse HEAD 2>/dev/null)"; then
VB="$(cd "$REPO_ROOT" && git rev-parse --abbrev-ref HEAD 2>/dev/null || echo unknown)"
VD="false"
if cd "$REPO_ROOT" && [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
VD="true"
fi
install -o "$SERVICE_USER" -g "$SERVICE_USER" -m 0644 /dev/stdin \
"$INSTALL_ROOT/VERSION" <<EOF
{"commit": "$VC", "branch": "$VB", "dirty": $VD, "installed_at_wall": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"}
EOF
log "VERSION stamp: $VC ($VB)$([[ "$VD" == "true" ]] && echo " [dirty]")"
else
log "WARN: $REPO_ROOT not a git checkout; episodes will record code_version.commit='unknown'"
fi
log "building venv" log "building venv"
if [[ "$USE_UV" -eq 1 ]]; then if [[ "$USE_UV" -eq 1 ]]; then
sudo -u "$SERVICE_USER" -- env HOME="$INSTALL_ROOT" \ sudo -u "$SERVICE_USER" -- env HOME="$INSTALL_ROOT" \

View file

@ -34,6 +34,15 @@ def test_episode_against_self_pid_produces_full_directory(tmp_path: Path) -> Non
meta = json.loads((d / "meta.json").read_text()) meta = json.loads((d / "meta.json").read_text())
assert meta["episode_id"] == result.episode_id assert meta["episode_id"] == result.episode_id
assert meta["schema_version"] == 1 assert meta["schema_version"] == 1
# code_version stamps which commit produced the episode so trainers
# can filter out pre-fix data without scanning every tarball.
assert "code_version" in meta
cv = meta["code_version"]
assert "commit" in cv and "source" in cv
# Source is "git" (we run tests in a git checkout) or "VERSION-file"
# (someone running tests against /opt/cis490/) or "unknown" (CI
# without git). All three are acceptable; the field is what matters.
assert cv["source"] in {"git", "VERSION-file", "unknown"}
assert meta["started_at_wall"] is not None assert meta["started_at_wall"] is not None
assert meta["ended_at_wall"] is not None assert meta["ended_at_wall"] is not None
assert meta["vm"]["target_pid"] == os.getpid() assert meta["vm"]["target_pid"] == os.getpid()