chore: speed up test suite slightly (#12969)

This PR speeds up some benchmarks when run as tests by lowering their workload. It also stops testing some of the more expensive benchmarks that can't be easily made smaller.
2026-03-20 13:24:32 +01:00 · 2026-03-20 13:24:32 +01:00 · 492fda3bca
commit 492fda3bca
parent 9676f54cc5
31 changed files with 122 additions and 65 deletions
--- a/.claude/CLAUDE.md
+++ b/.claude/CLAUDE.md
@ -16,19 +16,32 @@ See `tests/README.md` for full documentation. Quick reference:
 CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
 make -C build/release -j "$(nproc)" test

-# Specific test by name (supports regex via ctest -R)
+# Specific test by name (supports regex via ctest -R; double-quote special chars like |)
 CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
-make -C build/release -j "$(nproc)" test ARGS='-R grind_ematch'
+make -C build/release -j "$(nproc)" test ARGS="-R 'grind_ematch'"
+
+# Multiple tests matching a pattern
+CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
+make -C build/release -j "$(nproc)" test ARGS="-R 'treemap|phashmap'"

 # Rerun only previously failed tests
 CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
 make -C build/release -j "$(nproc)" test ARGS='--rerun-failed'

-# Single test from tests/foo/bar/ (quick check during development)
-CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
-make -C build/release -j "$(nproc)" test ARGS=-R testname'
+# Run a test manually without ctest (test pile: pass filename relative to the pile dir)
+tests/with_stage1_test_env.sh tests/elab_bench/run_bench.sh cbv_decide.lean
+tests/with_stage1_test_env.sh tests/elab/run_test.sh grind_indexmap.lean
 ```

+## Benchmark vs Test Problem Sizes
+
+Benchmarks are also run as tests. Use the `TEST_BENCH` environment variable (unset in tests, set to `1` in benchmarks) to scale problem sizes:
+
+- In `compile_bench` `.init.sh` files: check `$TEST_BENCH` and set `TEST_ARGS` accordingly
+- In `elab_bench` Lean files: use `(← IO.getEnv "TEST_BENCH") == some "1"` to switch between small (test) and large (bench) inputs
+
+See `tests/README.md` for the full benchmark writing guide.
+
 ## Testing stage 2

 When requested to test stage 2, build it as follows:
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -41,7 +41,6 @@ string(APPEND TEST_VARS " CXX='${CMAKE_CXX_COMPILER} ${LEANC_OPTS}'")
 set(WITH_TEST_ENV "${CMAKE_CURRENT_SOURCE_DIR}/with_stage${STAGE}_test_env.sh")
 set(WITH_BENCH_ENV "${CMAKE_CURRENT_SOURCE_DIR}/with_stage${STAGE}_bench_env.sh")

-string(APPEND TEST_VARS " TEST_BENCH=")
 configure_file(with_env.sh.in "${WITH_TEST_ENV}")

 block()
--- a/tests/README.md
+++ b/tests/README.md
@ -70,15 +70,17 @@ CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
 make -C build/release -j "$(nproc)" test ARGS="--rerun-failed"
 ```

-Run an individual test using one of these commands:
+Run an individual test using one of these commands.
+Note that regex arguments to `-R` need to be double-quoted
+if they contain any special shell characters like `|`.

 ```sh
 CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
-make -C build/release -j "$(nproc)" test ARGS="-R <regex>"
+make -C build/release -j "$(nproc)" test ARGS="-R '<regex>'"

 # For a specific stage
 CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
-make -C build/release/stage1 -j "$(nproc)" test ARGS="-R <regex>"
+make -C build/release/stage1 -j "$(nproc)" test ARGS="-R '<regex>'"

 # Manually, without ctest
 tests/with_stage1_test_env.sh path/to/test/directory/run_test.sh
@ -103,7 +105,8 @@ make -C build/release -j "$(nproc)" bench-part2 # produces tests/part2.measureme

 Make sure not to specify `-j "$(nproc)"` when running the bench suite manually inside `build/release/stage<n>`.

-Run an individual benchmark manually using
+Run an individual benchmark manually using one of these commands.
+Note that the `run_bench.sh` arguments are relative to the test directory, not the current working directory.

 ```sh
 tests/with_stage1_bench_env.sh path/to/test/directory/run_bench.sh
@ -144,7 +147,11 @@ When run as test, the problem instance should be as small as possible, but large

 The main mechanism to scale problem instances is the `TEST_BENCH` environment variable.
 It is unset in tests and set to `1` in benchmarks.
-Inside your benchmark, check whether the variable exists and adjust the problem size accordingly.
+Inside your benchmark, check whether the variable exists and adjust the problem size accordingly:
+
+```lean
+let bench := (← IO.getEnv "TEST_BENCH") == some "1"
+```

 Inside the `compile_bench` directory, there is a second mechanism:
 Using a `.init.sh` file to pass command line arguments to your test.
--- a/tests/compile_bench/channel.lean
+++ b/tests/compile_bench/channel.lean
@ -21,9 +21,6 @@ Note that we will stick exclusively to the sync interface for this as there is n
 reaped from async in this benchmark so we might as well just block.
 -/

-def MESSAGES : Nat := 100_000
-def THREADS : Nat := 4
-
 def seq (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
  for i in *...amount do
    ch.send i
@ -43,11 +40,11 @@ def spsc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
  IO.ofExcept (← IO.wait t1)
  IO.wait t2

-def mpsc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
-  let mut producers := Array.emptyWithCapacity THREADS
-  for _ in *...THREADS do
+def mpsc (threads : Nat) (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
+  let mut producers := Array.emptyWithCapacity threads
+  for _ in *...threads do
    let t ← IO.asTask (prio := .dedicated) do
-      for i in *...(amount/THREADS) do
+      for i in *...(amount/threads) do
        ch.send i
    producers := producers.push t

@ -59,16 +56,16 @@ def mpsc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
  for producer in producers do
    (IO.ofExcept (← IO.wait producer))

-def mpmc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
-  let mut producers := Array.emptyWithCapacity THREADS
-  for _ in *...THREADS do
+def mpmc (threads : Nat) (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
+  let mut producers := Array.emptyWithCapacity threads
+  for _ in *...threads do
    let t ← IO.asTask (prio := .dedicated) do
-      for i in *...(amount/THREADS) do
+      for i in *...(amount/threads) do
        ch.send i
    producers := producers.push t

-  let mut consumers := Array.emptyWithCapacity THREADS
-  for _ in *...THREADS do
+  let mut consumers := Array.emptyWithCapacity threads
+  for _ in *...threads do
    let t ← IO.asTask (prio := .dedicated) do
      while true do
        if let some _ ← ch.recv then
@ -87,31 +84,33 @@ def mpmc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do

  return ()

-def run (name : String) (cap : Option Nat) (bench : Std.CloseableChannel.Sync Nat → Nat → IO Unit) :
-    IO Unit := do
+def run (name : String) (cap : Option Nat) (messages : Nat)
+    (bench : Std.CloseableChannel.Sync Nat → Nat → IO Unit) : IO Unit := do
  let ch ← Std.CloseableChannel.new cap
  let t1 ← IO.monoMsNow
-  bench ch.sync MESSAGES
+  bench ch.sync messages
  let t2 ← IO.monoMsNow
  let time : Float := (t2 - t1).toFloat / 1000.0
  IO.println s!"measurement: {name} {time} s"


-def main : IO Unit := do
-  run "bounded0_spsc" (some 0) spsc
-  run "bounded0_mpsc" (some 0) mpsc
-  run "bounded0_mpmc" (some 0) mpmc
+def main (args : List String) : IO Unit := do
+  let threads := args[0]!.toNat!
+  let messages := args[1]!.toNat!
+  run "bounded0_spsc" (some 0) messages spsc
+  run "bounded0_mpsc" (some 0) messages (mpsc threads)
+  run "bounded0_mpmc" (some 0) messages (mpmc threads)

-  run "bounded1_spsc" (some 1) spsc
-  run "bounded1_mpsc" (some 1) mpsc
-  run "bounded1_mpmc" (some 1) mpmc
+  run "bounded1_spsc" (some 1) messages spsc
+  run "bounded1_mpsc" (some 1) messages (mpsc threads)
+  run "bounded1_mpmc" (some 1) messages (mpmc threads)

-  run "boundedn_spsc" (some MESSAGES) spsc
-  run "boundedn_mpsc" (some MESSAGES) mpsc
-  run "boundedn_mpmc" (some MESSAGES) mpmc
-  run "boundedn_seq" (some MESSAGES) seq
+  run "boundedn_spsc" (some messages) messages spsc
+  run "boundedn_mpsc" (some messages) messages (mpsc threads)
+  run "boundedn_mpmc" (some messages) messages (mpmc threads)
+  run "boundedn_seq" (some messages) messages seq

-  run "unbounded_spsc" none spsc
-  run "unbounded_mpsc" none mpsc
-  run "unbounded_mpmc" none mpmc
-  run "unbounded_seq" none seq
+  run "unbounded_spsc" none messages spsc
+  run "unbounded_mpsc" none messages (mpsc threads)
+  run "unbounded_mpmc" none messages (mpmc threads)
+  run "unbounded_seq" none messages seq
--- a/tests/compile_bench/channel.lean.init.sh
+++ b/tests/compile_bench/channel.lean.init.sh
@ -0,0 +1,5 @@
+TEST_ARGS=( 4 1000 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 4 100000 )
+fi
--- a/tests/compile_bench/deriv.lean.init.sh
+++ b/tests/compile_bench/deriv.lean.init.sh
@ -1,4 +1,4 @@
-TEST_ARGS=( 9 )
+TEST_ARGS=( 7 )

 if [[ -n $TEST_BENCH ]]; then
  TEST_ARGS=( 10 )
--- a/tests/compile_bench/deriv.lean.out.expected
+++ b/tests/compile_bench/deriv.lean.out.expected
@ -5,5 +5,3 @@
 5 count: 2202
 6 count: 12886
 7 count: 83648
-8 count: 598592
-9 count: 4697200
--- a/tests/compile_bench/hashmap.lean.init.sh
+++ b/tests/compile_bench/hashmap.lean.init.sh
@ -1 +1,5 @@
-TEST_ARGS=( 11 10000 )
+TEST_ARGS=( 11 100 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 11 10000 )
+fi
--- a/tests/compile_bench/ilean_roundtrip.lean.init.sh
+++ b/tests/compile_bench/ilean_roundtrip.lean.init.sh
@ -1 +1,5 @@
-TEST_ARGS=( 200000 )
+TEST_ARGS=( 1000 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 200000 )
+fi
--- a/tests/compile_bench/nat_repr.lean.init.sh
+++ b/tests/compile_bench/nat_repr.lean.init.sh
@ -1 +1,5 @@
-TEST_ARGS=( 5000 )
+TEST_ARGS=( 500 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 5000 )
+fi
--- a/tests/compile_bench/nat_repr.lean.out.expected
+++ b/tests/compile_bench/nat_repr.lean.out.expected
@ -1 +1 @@
-44945605
+324355
--- a/tests/compile_bench/phashmap.lean.init.sh
+++ b/tests/compile_bench/phashmap.lean.init.sh
@ -1 +1,5 @@
-TEST_ARGS=( 11 10000 )
+TEST_ARGS=( 11 100 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 11 10000 )
+fi
--- a/tests/compile_bench/sigmaIterator.lean
+++ b/tests/compile_bench/sigmaIterator.lean
@ -164,10 +164,10 @@ def f._at_.g.spec_0._redArg _x.1 _x.2 _x.3 it : Nat :=
 ...
 ```
 -/
-def g : Nat := Id.run do
-  (*...30000000).iter.filter (fun _ => True)
+def g (n : Nat) : Nat := Id.run do
+  (*...n).iter.filter (fun _ => True)
    |>.sigma (α := fun _ => _) (param := 0)
    |> f (acc := 0)

-def main : IO Unit :=
-  IO.println g
+def main (args : List String) : IO Unit :=
+  IO.println (g args[0]!.toNat!)
--- a/tests/compile_bench/sigmaIterator.lean.init.sh
+++ b/tests/compile_bench/sigmaIterator.lean.init.sh
@ -0,0 +1,5 @@
+TEST_ARGS=( 100 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 30000000 )
+fi
--- a/tests/compile_bench/sigmaIterator.lean.out.expected
+++ b/tests/compile_bench/sigmaIterator.lean.out.expected
@ -1 +1 @@
-449999985000000
+4950
--- a/tests/compile_bench/treemap.lean.init.sh
+++ b/tests/compile_bench/treemap.lean.init.sh
@ -1 +1,5 @@
-TEST_ARGS=( 11 10000 )
+TEST_ARGS=( 11 100 )
+
+if [[ -n $TEST_BENCH ]]; then
+  TEST_ARGS=( 11 10000 )
+fi
--- a/tests/elab_bench/bv_decide_inequality.lean.no_test
+++ b/tests/elab_bench/bv_decide_inequality.lean.no_test
--- a/tests/elab_bench/bv_decide_mod.lean.no_test
+++ b/tests/elab_bench/bv_decide_mod.lean.no_test
--- a/tests/elab_bench/cbv_arm_ldst.lean.no_test
+++ b/tests/elab_bench/cbv_arm_ldst.lean.no_test
--- a/tests/elab_bench/cbv_decide.lean
+++ b/tests/elab_bench/cbv_decide.lean
@ -35,7 +35,9 @@ set_option maxHeartbeats 400000
 def runCbvTests : MetaM Unit := do
  IO.println "=== Call-By-Value Tactic Tests ==="
  IO.println ""
-  for n in [6,10,12,14,15,18,20] do
+  let bench := (← IO.getEnv "TEST_BENCH") == some "1"
+  let ns := if bench then [6,10,12,14,15,18,20] else [6]
+  for n in ns do
    runSingleTest n

 #eval runCbvTests
--- a/tests/elab_bench/cbv_divisors.lean
+++ b/tests/elab_bench/cbv_divisors.lean
@ -24,7 +24,8 @@ def runProblem (n : Nat) : MetaM Unit := do
 def runCbvTests : MetaM Unit := do
  IO.println "=== Call-By-Value Tactic Tests ==="
  IO.println ""
-  for n in List.range 200 do
+  let bench := (← IO.getEnv "TEST_BENCH") == some "1"
+  for n in List.range (if bench then 200 else 10) do
    runProblem n

 #eval runCbvTests
--- a/tests/elab_bench/cbv_leroy.lean
+++ b/tests/elab_bench/cbv_leroy.lean
@ -183,7 +183,9 @@ set_option maxHeartbeats 400000
 def runCbvTests : MetaM Unit := do
  IO.println "=== Call-By-Value Tactic Tests ==="
  IO.println ""
-  for n in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000] do
+  let bench := (← IO.getEnv "TEST_BENCH") == some "1"
+  let ns := if bench then [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000] else [10]
+  for n in ns do
    runSingleTest n

 def runDecideTests : MetaM Unit := do
--- a/tests/elab_bench/cbv_merge_sort.lean
+++ b/tests/elab_bench/cbv_merge_sort.lean
@ -46,7 +46,9 @@ def runProblem (n : Nat) : MetaM Unit := do
 def runBenchmarks : MetaM Unit := do
  IO.println "=== Merge Sort CBV Benchmarks ==="
  IO.println ""
-  for n in [10, 25, 50, 75, 100, 125, 175] do
+  let bench := (← IO.getEnv "TEST_BENCH") == some "1"
+  let sizes := if bench then [10, 25, 50, 75, 100, 125, 175] else [10]
+  for n in sizes do
    runProblem n

 #eval runBenchmarks
--- a/tests/elab_bench/cbv_system_f.lean.no_test
+++ b/tests/elab_bench/cbv_system_f.lean.no_test
--- a/tests/elab_bench/charactersIn.lean
+++ b/tests/elab_bench/charactersIn.lean
@ -9,8 +9,10 @@ def bench (pattern : String) : TermElabM Unit := do
  let env ← getEnv
  -- IO.println s!"{env.constants.size} decls"
  let list := env.constants.toList |>.map fun (n, _) => n.toString
-  IO.println s!"Matching {list.length * TURNS} decls"
-  for _ in 0...TURNS do
+  let bench := (← IO.getEnv "TEST_BENCH") == some "1"
+  let turns := if bench then TURNS else 1
+  IO.println s!"Matching {list.length * turns} decls"
+  for _ in 0...turns do
    let mut n := 0
    for name in list do
      if pattern.charactersIn name then n := n + 1
--- a/tests/elab_bench/grind_bitvec2.lean.no_test
+++ b/tests/elab_bench/grind_bitvec2.lean.no_test
--- a/tests/elab_bench/grind_bitvec2.lean.out.expected
+++ b/tests/elab_bench/grind_bitvec2.lean.out.expected
@ -1 +0,0 @@
-grind_bitvec2.lean:291:8-291:27: warning: declaration uses `sorry`
--- a/tests/elab_bench/riscv-ast.lean.no_test
+++ b/tests/elab_bench/riscv-ast.lean.no_test
--- a/tests/elab_bench/run_bench.sh
+++ b/tests/elab_bench/run_bench.sh
@ -11,7 +11,6 @@ capture_only "$1" \
 normalize_mvar_suffixes
 normalize_reference_urls
 extract_measurements "$TOPIC"
-check_out_file
 check_exit_is_success

 run_after "$1"
--- a/tests/measure.py
+++ b/tests/measure.py
@ -114,7 +114,10 @@ def measure_perf(cmd: list[str], events: set[str], capture: bool) -> MeasureResu
        # Execute command
        result = subprocess.run(cmd, env=env, capture_output=capture, encoding="utf-8")
        if result.returncode != 0:
-            sys.exit(result.returncode)
+            if capture:
+                print(result.stdout, end="", file=sys.stdout)
+                print(result.stderr, end="", file=sys.stderr)
+            raise SystemExit(result.returncode)

        # Collect results
        perf: PerfResults = {}
--- a/tests/util.sh
+++ b/tests/util.sh
@ -92,6 +92,7 @@ function capture_only {
  # since $TMP_DIR is already specific to this test run.
  CAPTURED="${1:-"$TMP_DIR/tmp"}"; shift
  EXIT=0; (set -x; "${@}" > "$CAPTURED.out.produced" 2>&1) || EXIT="$?"
+  cat "$CAPTURED.out.produced"
 }

 # Run a command, capturing its output and failing if the command fails.
				`@ -1 +0,0 @@`
				grind_bitvec2.lean:291:8-291:27: warning: declaration uses `sorry`