#!/usr/bin/env bash # Pull training data from the receiver Pi to a local trainer box. # # Run this on the trainer (e.g. the Windows/2070-Super box via WSL or a # Linux desktop). Requires WireGuard up to 10.100.0.1 with `cis490-trainer` # enrollment so SSH key auth works. # # What gets pulled: # /var/lib/cis490/episodes/ raw .tar.zst episode tarballs (~3GB) # /var/lib/cis490/index.jsonl shipped-episode index # data/processed/validation_v1.parquet validator output (committed in repo) # # Once those are local you can run: # uv run --group training python training/build_features.py \ # --validation data/processed/validation_v1.parquet \ # --store ./episodes \ # --out-dir data/processed # # Then training/train_gbt.py and training/train_nn.py. set -euo pipefail PI_HOST="${PI_HOST:-10.100.0.1}" PI_USER="${PI_USER:-max}" LOCAL_DIR="${LOCAL_DIR:-./episodes}" mkdir -p "${LOCAL_DIR}" echo "→ rsyncing episodes from ${PI_USER}@${PI_HOST}:/var/lib/cis490/episodes/" rsync -ah --info=progress2 \ --exclude='*.partial' \ "${PI_USER}@${PI_HOST}:/var/lib/cis490/episodes/" \ "${LOCAL_DIR}/" echo "→ rsyncing index.jsonl" rsync -a --info=progress2 \ "${PI_USER}@${PI_HOST}:/var/lib/cis490/index.jsonl" \ "${LOCAL_DIR}/index.jsonl" echo "done. ${LOCAL_DIR} contains:" du -sh "${LOCAL_DIR}" ls "${LOCAL_DIR}/" | head