test: measure VC discharging separately in Sym mvcgen benchmarks (#12551)

This PR refactors the benchmark driver of the Sym mvcgen benchmarks such
that time spent for discharging VCs and instantiation of MVars is
measured separately from VC generation.

Example output:

```
baseline_add_sub_cancel
goal_100: 57 ms, 0 VCs, kernel: 22 ms
goal_500: 353 ms, 0 VCs, kernel: 160 ms
goal_1000: 755 ms, 0 VCs, kernel: 437 ms

vcgen_add_sub_cancel
goal_100: 36 ms, 1 VCs by grind: 21 ms, kernel: 35 ms
goal_500: 149 ms, 1 VCs by grind: 115 ms, kernel: 214 ms
goal_1000: 314 ms, 1 VCs by grind: 249 ms, kernel: 478 ms

vcgen_deep_add_sub_cancel
goal_100: 65 ms, 1 VCs by grind: 23 ms, kernel: 82 ms
goal_500: 262 ms, 1 VCs by grind: 123 ms, kernel: 539 ms
goal_1000: 611 ms, 1 VCs by grind: 292 ms, kernel: 1075 ms

vcgen_get_throw_set
goal_100: 87 ms, 101 VCs by sorry: 16 ms, kernel: 93 ms
goal_500: 332 ms, 501 VCs by sorry: 289 ms, instantiate > 1000ms: 23363 ms, kernel: 770 ms
goal_1000: 794 ms, 1001 VCs by sorry: 1332 ms, instantiate > 1000ms: 334614 ms, kernel: 1882 ms
```
This commit is contained in:
Sebastian Graf 2026-02-18 13:03:47 +01:00 committed by GitHub
parent ad64f7c1ba
commit c0b9ff1148
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 50 additions and 35 deletions

View file

@ -20,6 +20,6 @@ def Goal (n : Nat) : Prop := ∀ s post, post () s → Exec s (loop n) post
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000
#eval runBenchUsingSym ``Goal [``loop, ``step] solve
[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
#eval runBenchUsingSym ``Goal [``loop, ``step] (solve · *> return []) `(tactic| fail)
[100, 500, 1000]
-- [1000]

View file

@ -9,8 +9,15 @@ import Lean.Elab
open Lean Parser Meta Elab Tactic Sym
def timeItMs (k : MetaM α) : MetaM (α × UInt64) := do
let startTime ← IO.monoNanosNow
let a ← k
let endTime ← IO.monoNanosNow
let ms := (endTime - startTime).toFloat / 1000000.0
return (a, ms.toUInt64)
/-- Helper function for executing a tactic `k` for solving `$(goal) n`. -/
def driver (goal : Name) (unfold : List Name) (n : Nat) (check := true) (k : MVarId → MetaM Unit) : MetaM Unit := do
def driver (goal : Name) (unfold : List Name) (n : Nat) (discharge : MetaM (TSyntax `tactic)) (check := true) (k : MVarId → MetaM (List MVarId)) : MetaM Unit := do
let mvar ← mkFreshExprMVar (mkApp (mkConst goal) (mkNatLit n))
-- The following code uses `Sym.simp`, but it balloons in the kernel. TODO: Investigate with new semantic
-- foundations. Use regular simp for now.
@ -23,46 +30,54 @@ def driver (goal : Name) (unfold : List Name) (n : Nat) (check := true) (k : MVa
-- let unfold := Syntax.SepArray.ofElems (unfold.toArray |>.map (Lean.mkIdent ·))
let lemmas ← unfold.toArray |>.push goal |>.mapM fun n => `(simpLemma| $(Lean.mkIdent n):term)
let unfold := Syntax.TSepArray.ofElems (sep := ",") lemmas
let ([mvarId], _) ← Lean.Elab.runTactic mvar.mvarId! (← `(tactic| simp only [$unfold,*])).raw {} {}
| throwError "FAILED!"
let startTime ← IO.monoNanosNow
k mvarId
let endTime ← IO.monoNanosNow
let ms := (endTime - startTime).toFloat / 1000000.0
if check then
let startTime ← IO.monoNanosNow
checkWithKernel (← instantiateExprMVars mvar)
let endTime ← IO.monoNanosNow
let kernelMs := (endTime - startTime).toFloat / 1000000.0
IO.println s!"goal_{n}: {ms} ms, kernel: {kernelMs} ms"
let (mvarId, _unfoldMs) ← timeItMs do
let ([mvarId], _) ← Lean.Elab.runTactic mvar.mvarId! (← `(tactic| simp only [$unfold,*])).raw {} {}
| throwError "FAILED!"
return mvarId
-- IO.println s!"time spent unfolding: {_unfoldMs} ms"
let (mvarIds, ms) ← timeItMs do k mvarId
let discharge ← discharge
let dischargePp ← PrettyPrinter.ppTactic discharge
let dischargeMs? ← OptionT.run <| do
guard !mvarIds.isEmpty
Prod.snd <$> timeItMs do
for mvarId in mvarIds do
let ([], _) ← Lean.Elab.runTactic mvarId discharge.raw {} {}
| throwError "{dischargePp} failed to solve {mvarId}"
let (expr, instMs) ← timeItMs (instantiateMVars mvar)
let (_, kernelMs) ← timeItMs (checkWithKernel expr)
let mut msg := s!"goal_{n}: {ms} ms"
if let some dischargeMs := dischargeMs? then
msg := msg ++ s!", {mvarIds.length} VCs by {dischargePp}: {dischargeMs} ms"
else
IO.println s!"goal_{n}: {ms} ms"
msg := msg ++ s!", {mvarIds.length} VCs"
if instMs > 1000 then
msg := msg ++ s!", instantiate > 1000ms: {instMs} ms"
msg := msg ++ s!", kernel: {kernelMs} ms"
IO.println msg
def solveUsingTactic (goal : Name) (unfold : List Name) (n : Nat) (solve : MetaM (TSyntax `tactic)) (check := true) : MetaM Unit := do
driver goal unfold n check fun mvarId => do
let ([], _) ← Lean.Elab.runTactic mvarId (← solve).raw {} {} | throwError "FAILED!"
def solveUsingTactic (goal : Name) (unfold : List Name) (n : Nat) (solve : MetaM (TSyntax `tactic)) (discharge : MetaM (TSyntax `tactic)) (check := true) : MetaM Unit := do
driver goal unfold n discharge check fun mvarId => do
let (mvarIds, _) ← Lean.Elab.runTactic mvarId (← solve).raw {} {}
return mvarIds
/--
Solves a goal of the form `goal n` using the given tactic, where `n` ranges over `sizes`.
`unfold` is a list of `simp` lemmas to apply in order to unfold `goal n`.
For many benchmarks, this is `[step, loop]`.
-/
public def runBenchUsingTactic (goal : Name) (unfold : List Name) (solve : MetaM (TSyntax `tactic)) (sizes : List Nat) : MetaM Unit := do
IO.println "=== VCGen tests ==="
IO.println ""
public def runBenchUsingTactic (goal : Name) (unfold : List Name) (solve : MetaM (TSyntax `tactic)) (discharge : MetaM (TSyntax `tactic)) (sizes : List Nat) : MetaM Unit := do
for n in sizes do
solveUsingTactic goal unfold n solve
solveUsingTactic goal unfold n solve discharge
def solveUsingSym (goal : Name) (unfold : List Name) (n : Nat) (solve : MVarId → SymM Unit) (check := true) : MetaM Unit := do
driver goal unfold n check fun mvarId => SymM.run do solve mvarId
def solveUsingSym (goal : Name) (unfold : List Name) (n : Nat) (solve : MVarId → SymM (List MVarId)) (discharge : MetaM (TSyntax `tactic)) (check := true) : MetaM Unit := do
driver goal unfold n discharge check fun mvarId => SymM.run do solve mvarId
/--
Solves a goal of the form `goal n` using a `SymM` procedure, where `n` ranges over `sizes`.
`unfold` is a list of `simp` lemmas to apply in order to unfold `goal n`.
For many benchmarks, this is `[step, loop]`.
-/
public def runBenchUsingSym (goal : Name) (unfold : List Name) (solve : MVarId → SymM Unit) (sizes : List Nat) : MetaM Unit := do
IO.println "=== Symbolic Simulation Tests ==="
IO.println ""
public def runBenchUsingSym (goal : Name) (unfold : List Name) (solve : MVarId → SymM (List MVarId)) (discharge : MetaM (TSyntax `tactic)) (sizes : List Nat) : MetaM Unit := do
for n in sizes do
solveUsingSym goal unfold n solve
solveUsingSym goal unfold n solve discharge

View file

@ -36,6 +36,6 @@ def Goal (n : Nat) : Prop := ∀ post, ⦃post⦄ loop n ⦃⇓_ => post⦄
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen' <;> sorry)
[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| grind)
[100, 500, 1000]
-- [1000]

View file

@ -57,6 +57,6 @@ example : Goal 20 := by
mvcgen' <;> grind
-/
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen' <;> sorry)
[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| grind)
[100, 500, 1000]
-- [1000]

View file

@ -37,6 +37,6 @@ example : Goal 20 := by
all_goals sorry
-/
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen' <;> sorry)
[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| sorry)
[100, 500, 1000]
-- [1000]