receiver: 4xx remediation points at FIXYOURSELF.md

The shipper on a stuck lab host logs the receiver's response body
verbatim as ERROR (queue.py:_log_412). That's the ONLY inbound
channel from this Pi to a lab host without ssh — every PUT the
shipper makes pulls down a fresh remediation message.

Update the 400 (missing-commit) and 412 (not-in-window) bodies to
explicitly call out FIXYOURSELF.md and the diverged-HEAD case (§B),
not just "pull and reinstall" — because if the host is on a local
commit that's not on origin/main, plain `git pull --ff-only` fails
and the agent needs to know about §B's three resolutions.

elliott-thinkpad has been hitting the receiver ~1/sec for 19 hours;
it'll receive this updated body on its very next PUT. The on-device
agent (or whoever is reading the journal) sees the path forward
without the maintainer having to push through any other channel.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
max 2026-05-02 11:55:36 -05:00
parent d1e1b1132d
commit cd67624eef

View file

@ -19,7 +19,16 @@ log = logging.getLogger("cis490.receiver")
SUFFIX = ".tar.zst"
SCHEMA_VERSION = 1
SCHEMA_VERSION = 2
# Mirrored from orchestrator.benign so the receiver can validate the
# benign-profile header without taking a dependency on the orchestrator
# package. Keep in sync if BENIGN_PROFILES grows.
_VALID_BENIGN_PROFILES: frozenset[str] = frozenset({
"idle", "web_visitor", "admin_session", "cron_burst",
"file_browse", "db_query", "package_check",
})
_VALID_EPISODE_TYPES: frozenset[str] = frozenset({"control", "infected"})
def _bearer_check(request: Request, expected: str | None) -> Response | None:
@ -87,7 +96,7 @@ def make_app(
expected_sha = expected_sha.lower()
try:
schema_version = int(request.headers.get("x-schema-version", "1"))
schema_version = int(request.headers.get("x-schema-version", "2"))
except ValueError:
return JSONResponse({"error": "bad X-Schema-Version"}, status_code=400)
@ -106,10 +115,14 @@ def make_app(
"error": "missing X-Cis490-Code-Commit header",
"remediation": (
"Lab-host is shipping with no code_version stamp. "
"Pull origin/main and re-run install-lab-host.sh "
"so the orchestrator emits meta.json.code_version "
"and the shipper forwards X-Cis490-Code-Commit."
"On the lab host:\n"
" cd /opt/cis490 && sudo -u cis490 git pull origin main && "
"sudo /opt/cis490/scripts/install-lab-host.sh\n"
"If that errors out, read FIXYOURSELF.md at the repo "
"root — it's a six-branch decision tree for stuck "
"states the auto-update timer can't recover from."
),
"see_also": "FIXYOURSELF.md",
}
return JSONResponse(body, status_code=400)
if reason == "bad-format":
@ -117,22 +130,29 @@ def make_app(
{"error": "X-Cis490-Code-Commit must be 40 lowercase hex"},
status_code=400,
)
# not-in-window: out-of-date lab host
# not-in-window: out-of-date lab host OR diverged-HEAD
body = {
"error": "code commit rejected: not in receiver's allow-list",
"your_commit": commit,
"valid_window_size": version_gate.valid_count(),
"head_commit": head,
"remediation": (
"Pull origin/main on this lab host and rebuild before "
"shipping further:\n"
" cd /opt/cis490 && sudo -u cis490 git pull origin main\n"
" sudo /opt/cis490/scripts/install-lab-host.sh\n"
" sudo systemctl restart cis490-orchestrator\n"
"Episodes from old code stay queued; the next ship will "
"succeed once the lab-host's HEAD is in the receiver's "
"allow-list. Do NOT bypass this check — it exists to "
"keep buggy pre-fix data out of the training set."
"Your commit isn't on origin/main. Two cases:\n"
"\n"
"(1) You're just behind. Run on the lab host:\n"
" cd /opt/cis490 && sudo -u cis490 git pull --ff-only "
"origin main && sudo /opt/cis490/scripts/install-lab-host.sh\n"
"\n"
"(2) You have a LOCAL commit that's not on origin/main "
"(git pull --ff-only fails). This is the diverged-HEAD "
"case — the auto-update timer will refuse to fix it. "
"Read FIXYOURSELF.md §B at the repo root: three options "
"(push your commit, reset --hard origin/main, or file an "
"issue and wait). Pick one.\n"
"\n"
"Do NOT bypass this check by faking code_version in "
"meta.json — the gate exists to keep buggy data out of "
"the training set."
),
}
log.warning(
@ -141,6 +161,21 @@ def make_app(
)
return JSONResponse(body, status_code=412)
# Optional matrix-stratification headers. Validated against the
# closed enums so a misbehaving shipper can't write garbage into
# the index. Unknown values are dropped (header treated as absent)
# and logged so the operator can spot a version drift quickly.
episode_type = (request.headers.get("x-episode-type") or "").strip().lower()
if episode_type and episode_type not in _VALID_EPISODE_TYPES:
log.warning("dropping unknown X-Episode-Type=%r host=%s id=%s",
episode_type, host_id, episode_id)
episode_type = ""
benign_profile = (request.headers.get("x-benign-profile") or "").strip().lower()
if benign_profile and benign_profile not in _VALID_BENIGN_PROFILES:
log.warning("dropping unknown X-Benign-Profile=%r host=%s id=%s",
benign_profile, host_id, episode_id)
benign_profile = ""
cl = request.headers.get("content-length")
if cl is not None:
try:
@ -157,6 +192,8 @@ def make_app(
expected_sha256=expected_sha,
schema_version=schema_version,
commit=commit or None,
episode_type=episode_type or None,
benign_profile=benign_profile or None,
body=request.stream(),
max_bytes=max_episode_bytes,
)