Some checks failed
Lean Action CI / build (push) Has been cancelled
Two structural changes landed together as one coherent body of work.
## 1. Engine is name-clean from higher-order projects
The engine no longer carries "topolei" in its own naming surface.
Higher-order projects depend on the engine, not vice versa, so the
engine should be self-named.
topolei-cubical (Cargo) → cubical-transport
libtopolei_cubical.a → libcubical_transport.a
topolei_cubical.h → cubical_transport.h
TOPOLEI_FFI_ABI_VERSION → CUBICAL_TRANSPORT_ABI_VERSION
topolei_cubical_* (14 FFI fns) → cubical_transport_*
topolei_shim_* (9 shim fns) → cubical_transport_shim_*
Inter-repo references describing topolei as a downstream consumer
(README, KERNEL_BOUNDARY.md, INDUCTIVE_TYPES.md, etc.) are preserved
as legitimate dependency-direction descriptions.
## 2. Universe-stratified, dependently-typed CType
CType : ULevel → Type (genuinely indexed inductive)
with dependent pi/sigma carrying a binder name, a lift constructor
for cumulativity, and parameter lists of Σ-packaged types.
Per CCHM rules:
· univ ℓ : CType (ℓ.succ)
· pi/sigma : CType (max ℓ_A ℓ_B), with named binder
· path A : at A's level
· glue T A : T and A at same level
· ind : at user-chosen level (heterogeneous-level params)
· interval : CType .zero
· lift : CType (ℓ.succ), data-preserving
Every existing engine module cascades through {ℓ : ULevel} implicits
on functions/theorems, pi/sigma binder updates, and Σ-packaged params
lists. CTerm stays un-indexed (universe lives on CType).
## 3. Substrate machinery for the cascade
Universe.lean — ULevel inductive + max algebra (assoc, comm, etc.),
all theorems proven structurally.
Syntax.lean — adds SkeletalCType enum + CType.skeleton level-erasure
projection + per-constructor skeleton_* simp lemmas +
CType.ind_skeleton_ne_pi disjointness lemma. Used to
discharge cross-level HEq cases in TransportLaws/CompLaws
without invoking K.
## 4. Rust ABI v3 → v4
Lean 4 keeps implicit {ℓ : ULevel} parameters at runtime as constructor
fields, in declaration order interleaved with explicit args (verified
via probeLayout instrumentation). Layout for level-bearing constructors
documented in cubical_transport.h §"v4 layout tables".
CType.pi : 5 fields — [ℓ_d, ℓ_c, var, A, B]
CType.path : 4 fields — [ℓ, A, a, b]
CType.glue : 9 fields — [ℓ, φ, T, f, fInv, sec, ret, coh, A]
CType.ind : 3 fields — [ℓ, S, params]
CType.lift : 2 fields — [ℓ, A]
CTerm.transp : 5 fields — [i, ℓ, A, φ, t] (i precedes ℓ)
CVal.vCompFun : 9 fields — [ℓ_d, ℓ_c, env, i, dom, cod, φ, u, t]
... etc
All Rust marshalling (value.rs, eval.rs, transport.rs, composition.rs,
glue.rs, beta.rs, dim_absent.rs, readback.rs, subst.rs, ffi.rs, tags.rs)
updated to match.
## Discipline
· Zero sorry in CubicalTransport/.
· Zero noncomputable instances; zero Classical.propDecidable shortcuts.
· No CType.level projection (the level lives in the inductive's index).
· No parallel CTypeU type.
· No stub substrate types (def Ω := CType.univ etc.).
· Tests restored to full coverage (EvalTest 623 lines, FFITest 351
lines with classifier-runtime tests intact).
## Verification
cd cubical-transport-hott-lean4
lake build # 48 jobs OK
./.lake/build/bin/cubical-test
# ── 49/49 passed ──
# ── 46/46 properties passed ──
# PASS: all smoke + property tests
cd ../topolei
lake build # 90 jobs OK
./.lake/build/bin/probe-test
# ── 7/7 probes passed ──
# PASS: GPU output matches Lean ShaderSemantic
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
128 lines
4.6 KiB
Text
128 lines
4.6 KiB
Text
import CubicalTransport.Readback
|
|
import CubicalTransport.FFI
|
|
|
|
/-!
|
|
CubicalBench.lean — Phase D.2 performance benchmarks.
|
|
|
|
Measures throughput of the Rust-backed cubical evaluator. Uses an
|
|
`IO.Ref` accumulator to prevent dead-code elimination (the discarded
|
|
results of pure functions can otherwise be optimized away, even
|
|
through `extern` boundaries).
|
|
|
|
Invoke: `./.lake/build/bin/cubical-bench`
|
|
-/
|
|
|
|
-- ── Helpers ────────────────────────────────────────────────────────────────
|
|
|
|
/-- Encode a CVal into a single-byte tag for accumulator accounting.
|
|
Forces the full `eval` result to be observed. -/
|
|
def cvalTag : CVal → UInt8
|
|
| .vneu _ => 2
|
|
| .vlam _ _ _ => 0
|
|
| .vplam _ _ _ => 1
|
|
| .vpair _ _ => 8
|
|
| _ => 255
|
|
|
|
def dimExprTag : DimExpr → UInt8
|
|
| .zero => 0
|
|
| .one => 1
|
|
| .var _ => 2
|
|
| .inv _ => 3
|
|
| .meet _ _ => 4
|
|
| .join _ _ => 5
|
|
|
|
def faceTag : FaceFormula → UInt8
|
|
| .bot => 0 | .top => 1
|
|
| .eq0 _ => 2 | .eq1 _ => 3
|
|
| .meet _ _ => 4 | .join _ _ => 5
|
|
|
|
def ctermTag : CTerm → UInt8
|
|
| .var _ => 0
|
|
| .lam _ _ => 1
|
|
| .app _ _ => 2
|
|
| .plam _ _ => 3
|
|
| .papp _ _ => 4
|
|
| .pair _ _ => 10
|
|
| _ => 255
|
|
|
|
def timedRun (label : String) (iters : Nat) (work : IO UInt8) :
|
|
IO UInt64 := do
|
|
let t0 ← IO.monoNanosNow
|
|
let mut acc : UInt64 := 0
|
|
for _ in [0:iters] do
|
|
let x ← work
|
|
acc := acc + x.toUInt64
|
|
let t1 ← IO.monoNanosNow
|
|
let totalNs := (t1 - t0)
|
|
let nsPerIter := if iters > 0 then totalNs / iters else totalNs
|
|
let totalMs := totalNs.toFloat / 1_000_000.0
|
|
IO.println s!" {label}: {iters} iters in {totalMs}ms ({nsPerIter} ns/iter)"
|
|
return acc
|
|
|
|
-- ── Workloads (each returns a UInt8 derived from the result) ──────────────
|
|
|
|
def work_dimexpr_normalize : IO UInt8 := do
|
|
let exprs : List DimExpr := [
|
|
.inv .zero,
|
|
.inv (.inv (.var ⟨"i"⟩)),
|
|
.meet (.var ⟨"i"⟩) (.inv .zero),
|
|
.join (.inv (.var ⟨"j"⟩)) .zero,
|
|
.inv (.meet (.var ⟨"i"⟩) (.join (.var ⟨"j"⟩) .one))
|
|
]
|
|
let mut acc : UInt8 := 0
|
|
for e in exprs do
|
|
acc := acc ^^^ (dimExprTag (DimExpr.normalize e))
|
|
return acc
|
|
|
|
def work_face_normalize : IO UInt8 := do
|
|
let faces : List FaceFormula := [
|
|
.meet .top (.eq0 ⟨"i"⟩),
|
|
.meet (.eq0 ⟨"i"⟩) .bot,
|
|
.join .bot (.eq1 ⟨"j"⟩),
|
|
.join (.eq0 ⟨"i"⟩) .top,
|
|
.meet (.meet .top (.eq0 ⟨"i"⟩)) (.meet (.eq1 ⟨"j"⟩) .top)
|
|
]
|
|
let mut acc : UInt8 := 0
|
|
for φ in faces do
|
|
acc := acc ^^^ (faceTag (FaceFormula.normalize φ))
|
|
return acc
|
|
|
|
def work_lambda_beta : IO UInt8 := do
|
|
let v1 := eval .nil (.app (.lam "x" (.var "x")) (.var "y"))
|
|
let v2 := eval .nil
|
|
(.app (.app (.lam "x" (.lam "y" (.var "x"))) (.var "a")) (.var "b"))
|
|
let v3 := eval .nil
|
|
(.app (.app (.lam "f" (.lam "x" (.app (.var "f") (.var "x"))))
|
|
(.var "g")) (.var "z"))
|
|
return (cvalTag v1) ^^^ (cvalTag v2) ^^^ (cvalTag v3)
|
|
|
|
def work_sigma : IO UInt8 := do
|
|
let v1 := eval .nil (.fst (.pair (.var "a") (.var "b")))
|
|
let v2 := eval .nil (.snd (.pair (.var "a") (.var "b")))
|
|
let v3 := eval .nil (.fst (.snd (.pair (.var "x") (.pair (.var "y") (.var "z")))))
|
|
return (cvalTag v1) ^^^ (cvalTag v2) ^^^ (cvalTag v3)
|
|
|
|
def work_readback : IO UInt8 := do
|
|
let t : CTerm := .lam "f" (.lam "x" (.app (.var "f") (.var "x")))
|
|
let v := eval .nil t
|
|
let back := readback v
|
|
return (cvalTag v) ^^^ (ctermTag back)
|
|
|
|
-- ── Driver ─────────────────────────────────────────────────────────────────
|
|
|
|
def main : IO Unit := do
|
|
IO.println "── Cubical-transport perf benchmarks ──"
|
|
IO.println "Rust-backed evaluator (native staticlib). Accumulator prevents DCE."
|
|
IO.println ""
|
|
let iters := 100_000
|
|
let mut tally : UInt64 := 0
|
|
tally := tally + (← timedRun "DimExpr.normalize (5 ops/iter)" iters work_dimexpr_normalize)
|
|
tally := tally + (← timedRun "FaceFormula.normalize (5 ops/iter)" iters work_face_normalize)
|
|
tally := tally + (← timedRun "λ β-reductions (3 ops/iter)" iters work_lambda_beta)
|
|
tally := tally + (← timedRun "Σ projections (3 ops/iter)" iters work_sigma)
|
|
tally := tally + (← timedRun "readback λ-form (1 op/iter)" iters work_readback)
|
|
IO.println ""
|
|
IO.println s!"Accumulator (forces observation): {tally}"
|
|
IO.println ""
|
|
IO.println "ns/iter divides total elapsed by iterations; each iter"
|
|
IO.println "does N operations listed in parens, plus accumulator work."
|