Merge commit '86a088c' into Dev_REL1_043026
This commit is contained in:
commit
7c35bf7d49
5 changed files with 85 additions and 5 deletions
|
|
@ -7,6 +7,7 @@ dependencies = [
|
|||
"starlette>=0.36",
|
||||
"uvicorn[standard]>=0.27",
|
||||
"msgpack>=1.0", # MSF RPC wire format for the Tier-3 exploit driver
|
||||
"pycdlib>=1.14", # build NoCloud cidata ISOs in pure Python
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
|
@ -17,7 +18,6 @@ dev = [
|
|||
"matplotlib>=3.8",
|
||||
"tornado>=6", # required by matplotlib's WebAgg interactive backend
|
||||
"paramiko>=3", # SSH client for in-guest control on images that support it
|
||||
"pycdlib>=1.14", # build NoCloud cidata ISOs in pure Python
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ if [[ -f "$ALPINE_IMG" && ! -f "$CIDATA_ISO" ]]; then
|
|||
log "WARN: cidata build failed; run tools/build_cidata.py manually"
|
||||
fi
|
||||
# Symlink the canonical paths the launchers look at, when missing.
|
||||
install -d -o "$SERVICE_USER" -g "$SERVICE_USER" -m 0755 "$INSTALL_ROOT/vm/images"
|
||||
ln -sf "$ALPINE_IMG" "$INSTALL_ROOT/vm/images/alpine-baseline.qcow2" 2>/dev/null || true
|
||||
ln -sf "$CIDATA_ISO" "$INSTALL_ROOT/vm/images/cidata.iso" 2>/dev/null || true
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,15 @@ class ShipResult:
|
|||
error: str | None
|
||||
|
||||
|
||||
class _CertNotReadyError(Exception):
|
||||
"""Configured cert/CA paths aren't on disk yet.
|
||||
|
||||
Raised during first-boot bring-up: install-lab-host.sh enables the
|
||||
shipper before the Pi has issued the mTLS leaf. The transport
|
||||
catches this, logs once, and retries on each request so the daemon
|
||||
self-heals when the cert lands."""
|
||||
|
||||
|
||||
def _build_ssl_context(rcv: ReceiverEndpoint) -> ssl.SSLContext | bool:
|
||||
"""Build an SSL context honoring the wg-pki CA bundle + client cert.
|
||||
|
||||
|
|
@ -57,6 +66,16 @@ def _build_ssl_context(rcv: ReceiverEndpoint) -> ssl.SSLContext | bool:
|
|||
we use a context so we can attach the client cert for mTLS."""
|
||||
if not rcv.url.lower().startswith("https://"):
|
||||
return False
|
||||
# Pre-flight check the configured paths so we raise a recoverable
|
||||
# error here instead of letting ssl raise FileNotFoundError deep
|
||||
# inside create_default_context / load_cert_chain.
|
||||
for label, path in (
|
||||
("ca_bundle", rcv.ca_bundle),
|
||||
("client_cert", rcv.client_cert),
|
||||
("client_key", rcv.client_key),
|
||||
):
|
||||
if path and not Path(path).exists():
|
||||
raise _CertNotReadyError(f"{label} path missing: {path}")
|
||||
ctx = ssl.create_default_context(
|
||||
cafile=str(rcv.ca_bundle) if rcv.ca_bundle else None,
|
||||
)
|
||||
|
|
@ -74,11 +93,41 @@ def _build_ssl_context(rcv: ReceiverEndpoint) -> ssl.SSLContext | bool:
|
|||
class ShipperTransport:
|
||||
def __init__(self, cfg: ShipperConfig) -> None:
|
||||
self.cfg = cfg
|
||||
self._verify = _build_ssl_context(cfg.receiver)
|
||||
self._verify: ssl.SSLContext | bool | None = None
|
||||
self._cert_warned = False
|
||||
# Try once at construction; if certs aren't on disk yet, defer.
|
||||
# Each request will retry the build until it succeeds, so
|
||||
# systemd doesn't crash-loop the unit during first-boot.
|
||||
self._try_build_verify()
|
||||
|
||||
def _try_build_verify(self) -> bool:
|
||||
"""(Re)build the SSL context. Returns True if the transport is
|
||||
ready to make requests; False if certs aren't on disk yet."""
|
||||
if self._verify is not None:
|
||||
return True # already built (False for http, context for https)
|
||||
try:
|
||||
self._verify = _build_ssl_context(self.cfg.receiver)
|
||||
except _CertNotReadyError as e:
|
||||
if not self._cert_warned:
|
||||
log.warning(
|
||||
"shipper waiting on mTLS material (%s); will retry each request",
|
||||
e,
|
||||
)
|
||||
self._cert_warned = True
|
||||
return False
|
||||
if self._cert_warned:
|
||||
log.info("mTLS material now on disk; shipper transport ready")
|
||||
self._cert_warned = False
|
||||
return True
|
||||
|
||||
# ---- ping ----------------------------------------------------------
|
||||
|
||||
def ping(self) -> PingResult:
|
||||
if not self._try_build_verify():
|
||||
return PingResult(
|
||||
ok=False, status_code=0, body=None,
|
||||
error="mTLS material not yet on disk; waiting for cert delivery",
|
||||
)
|
||||
url = f"{self.cfg.receiver.url}/v1/ping"
|
||||
headers = self._common_headers()
|
||||
try:
|
||||
|
|
@ -110,6 +159,12 @@ class ShipperTransport:
|
|||
tarball_path: Path,
|
||||
sha256_hex: str,
|
||||
) -> ShipResult:
|
||||
if not self._try_build_verify():
|
||||
return ShipResult(
|
||||
status="transient", status_code=0,
|
||||
sha256=None, body=None,
|
||||
error="mTLS material not yet on disk; waiting for cert delivery",
|
||||
)
|
||||
url = (
|
||||
f"{self.cfg.receiver.url}/v1/episodes/"
|
||||
f"{self.cfg.host_id}/{episode_id}.tar.zst"
|
||||
|
|
|
|||
|
|
@ -191,6 +191,28 @@ def test_ping_fails_when_receiver_unreachable(tmp_path: Path) -> None:
|
|||
assert res.error is not None
|
||||
|
||||
|
||||
def test_transport_defers_when_ca_bundle_missing(tmp_path: Path) -> None:
|
||||
"""Issue #11: first-boot bring-up enables the shipper before the Pi
|
||||
has issued the mTLS leaf. Construction must not crash; ping/ship
|
||||
should return a transient error until the cert lands."""
|
||||
missing_ca = tmp_path / "not-yet" / "wg-ca.pem"
|
||||
cfg = ShipperConfig(
|
||||
host_id="lab1",
|
||||
data_root=tmp_path / "lab-data",
|
||||
receiver=ReceiverEndpoint(
|
||||
url="https://collector.wg",
|
||||
ca_bundle=missing_ca,
|
||||
),
|
||||
)
|
||||
# Construction MUST succeed even though the CA bundle is missing —
|
||||
# this is the bug fix: previously raised FileNotFoundError out of
|
||||
# ssl.create_default_context, crashing the systemd unit.
|
||||
transport = ShipperTransport(cfg)
|
||||
res = transport.ping()
|
||||
assert res.ok is False
|
||||
assert res.error is not None and "mTLS material" in res.error
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tar + ship
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -102,9 +102,9 @@ _JSON_MODE = False
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _run(cmd: list[str], *, timeout: float = 5.0) -> tuple[int, str, str]:
|
||||
def _run(cmd: list[str], *, timeout: float = 5.0, cwd: str | None = None) -> tuple[int, str, str]:
|
||||
try:
|
||||
p = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
p = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, cwd=cwd)
|
||||
return p.returncode, p.stdout.strip(), p.stderr.strip()
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired) as e:
|
||||
return -1, "", str(e)
|
||||
|
|
@ -558,7 +558,7 @@ def check_end_to_end(report: Report) -> None:
|
|||
rc, out, err = _run([
|
||||
"/opt/cis490/.venv/bin/python", "-m", "shipper",
|
||||
"--config", cfg, "--ping",
|
||||
], timeout=15.0)
|
||||
], timeout=15.0, cwd="/opt/cis490")
|
||||
if rc == 0 and '"ok": true' in out:
|
||||
report.add(Check("e2e: cis490-shipper --ping", "ok",
|
||||
detail="200 OK"))
|
||||
|
|
@ -588,6 +588,8 @@ def main(argv: list[str] | None = None) -> int:
|
|||
_JSON_MODE = args.json
|
||||
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
if str(repo_root) not in sys.path:
|
||||
sys.path.insert(0, str(repo_root))
|
||||
if not _JSON_MODE:
|
||||
print(f"{_ANSI_BOLD}cis490-doctor{_ANSI_RESET} role={args.role} repo={repo_root}\n")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue