"""Build a NoCloud cidata ISO for cloud-init. Cirros 0.6.x — and most cloud images — look for a NoCloud datasource at boot: an ISO9660 volume labeled ``cidata`` containing two files, ``user-data`` and ``meta-data``. We attach it as a second drive so cloud-init proceeds without spending ~17 minutes timing out trying to reach a non-existent metadata service. This script is intentionally self-contained and uses only pycdlib (pure Python) — no system mkisofs/xorriso/cloud-localds dependency. Usage: uv run python tools/build_cidata.py vm/images/cidata.iso The defaults bake in the ``cirros`` user with the documented Cirros password, enable SSH password auth (so future Metasploit-class images work without changes), and set a hostname. Override via flags if needed. """ from __future__ import annotations import argparse import io import sys from pathlib import Path import pycdlib DEFAULT_USER_DATA_HEAD = """\ #cloud-config hostname: cis490 manage_etc_hosts: true users: - name: cis490 plain_text_passwd: cis490 lock_passwd: false sudo: ALL=(ALL) NOPASSWD:ALL shell: /bin/sh ssh_pwauth: true disable_root: false chpasswd: expire: false list: | root:cis490 cis490:cis490 """ # OpenRC service file shipped inside the guest. Alpine uses OpenRC; # the runcmd at the bottom of user-data wires it up on first boot. OPENRC_SERVICE = """\ #!/sbin/openrc-run description="CIS490 in-guest telemetry agent" command="/usr/local/bin/cis490-agent" command_args="--port /dev/virtio-ports/cis490.guest.agent" command_background=true pidfile="/run/cis490-agent.pid" output_log="/var/log/cis490-agent.log" error_log="/var/log/cis490-agent.log" depend() { need localmount } """ DEFAULT_META_DATA = """\ instance-id: cis490-vm-001 local-hostname: cis490 """ def _indent(text: str, n: int) -> str: pad = " " * n return "\n".join(pad + line if line else line for line in text.splitlines()) def build_user_data(*, embed_agent: bool, agent_path: Path | None) -> bytes: """Build a cloud-init user-data document. When ``embed_agent`` is True, also stuff the in-guest agent + an OpenRC service into ``write_files`` and arrange to start the service on first boot.""" head = DEFAULT_USER_DATA_HEAD if not embed_agent: return (head + 'runcmd:\n - [ sh, -c, "echo CIS490_BOOT_OK > /tmp/.cis490-boot" ]\n').encode() if agent_path is None: agent_path = Path(__file__).resolve().parent.parent / "vm" / "guest-agent" / "cis490_agent.py" if not agent_path.exists(): raise FileNotFoundError(f"agent script not found: {agent_path}") agent_src = agent_path.read_text() # The Alpine cloud image (alpine-virt-3.X.Y-x86_64.qcow2) does not # ship python3 by default, so the agent's `#!/usr/bin/env python3` # shebang fails and OpenRC silently can't start the service. # Result: telemetry-guest.jsonl is empty on every episode. Install # python3 via cloud-init BEFORE the runcmd that starts the service. # Refs PIPELINE.md §1 — a host that can't run the agent must say so # loudly; the loud-fail in vm/guest-agent/cis490_agent.py + this # explicit dep install close the silent-downgrade loop. body = head + ( "packages:\n" " - python3\n" "package_update: true\n" "write_files:\n" " - path: /usr/local/bin/cis490-agent\n" " permissions: '0755'\n" " owner: root:root\n" " content: |\n" f"{_indent(agent_src, 6)}\n" " - path: /etc/init.d/cis490-agent\n" " permissions: '0755'\n" " owner: root:root\n" " content: |\n" f"{_indent(OPENRC_SERVICE, 6)}\n" "runcmd:\n" ' - [ sh, -c, "echo CIS490_BOOT_OK > /tmp/.cis490-boot" ]\n' ' - [ sh, -c, "command -v rc-update >/dev/null && rc-update add cis490-agent default || true" ]\n' ' - [ sh, -c, "command -v rc-service >/dev/null && rc-service cis490-agent start || true" ]\n' ) return body.encode() DEFAULT_META_DATA = """\ instance-id: cis490-vm-001 local-hostname: cis490 """ def build_cidata(out_path: Path, user_data: bytes, meta_data: bytes) -> None: iso = pycdlib.PyCdlib() # Joliet=3 + Rock Ridge so cloud-init reads filenames correctly on Linux. iso.new(joliet=3, vol_ident="cidata", interchange_level=3, rock_ridge="1.09") iso.add_fp( io.BytesIO(user_data), len(user_data), iso_path="/USERDATA.;1", rr_name="user-data", joliet_path="/user-data", ) iso.add_fp( io.BytesIO(meta_data), len(meta_data), iso_path="/METADATA.;1", rr_name="meta-data", joliet_path="/meta-data", ) iso.write(str(out_path)) iso.close() def main() -> int: parser = argparse.ArgumentParser(prog="build_cidata") parser.add_argument("out_path", type=Path) parser.add_argument( "--user-data", type=Path, default=None, help="path to a custom cloud-config user-data file", ) parser.add_argument( "--meta-data", type=Path, default=None, help="path to a custom meta-data file", ) parser.add_argument( "--no-embed-agent", action="store_true", help="don't bake the in-guest agent into user-data", ) parser.add_argument( "--agent-path", type=Path, default=None, help="path to the in-guest agent (default: vm/guest-agent/cis490_agent.py)", ) args = parser.parse_args() if args.user_data: user_data = args.user_data.read_bytes() else: user_data = build_user_data( embed_agent=not args.no_embed_agent, agent_path=args.agent_path, ) meta_data = ( args.meta_data.read_bytes() if args.meta_data else DEFAULT_META_DATA.encode() ) args.out_path.parent.mkdir(parents=True, exist_ok=True) build_cidata(args.out_path, user_data, meta_data) print(f"wrote {args.out_path} ({args.out_path.stat().st_size} bytes)") return 0 if __name__ == "__main__": sys.exit(main())