chore: speed up test suite slightly (#12969)

This PR speeds up some benchmarks when run as tests by lowering their
workload. It also stops testing some of the more expensive benchmarks
that can't be easily made smaller.
This commit is contained in:
Garmelon 2026-03-20 13:24:32 +01:00 committed by GitHub
parent 9676f54cc5
commit 492fda3bca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 122 additions and 65 deletions

View file

@ -16,19 +16,32 @@ See `tests/README.md` for full documentation. Quick reference:
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test
# Specific test by name (supports regex via ctest -R)
# Specific test by name (supports regex via ctest -R; double-quote special chars like |)
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS='-R grind_ematch'
make -C build/release -j "$(nproc)" test ARGS="-R 'grind_ematch'"
# Multiple tests matching a pattern
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS="-R 'treemap|phashmap'"
# Rerun only previously failed tests
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS='--rerun-failed'
# Single test from tests/foo/bar/ (quick check during development)
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS=-R testname'
# Run a test manually without ctest (test pile: pass filename relative to the pile dir)
tests/with_stage1_test_env.sh tests/elab_bench/run_bench.sh cbv_decide.lean
tests/with_stage1_test_env.sh tests/elab/run_test.sh grind_indexmap.lean
```
## Benchmark vs Test Problem Sizes
Benchmarks are also run as tests. Use the `TEST_BENCH` environment variable (unset in tests, set to `1` in benchmarks) to scale problem sizes:
- In `compile_bench` `.init.sh` files: check `$TEST_BENCH` and set `TEST_ARGS` accordingly
- In `elab_bench` Lean files: use `(← IO.getEnv "TEST_BENCH") == some "1"` to switch between small (test) and large (bench) inputs
See `tests/README.md` for the full benchmark writing guide.
## Testing stage 2
When requested to test stage 2, build it as follows:

View file

@ -41,7 +41,6 @@ string(APPEND TEST_VARS " CXX='${CMAKE_CXX_COMPILER} ${LEANC_OPTS}'")
set(WITH_TEST_ENV "${CMAKE_CURRENT_SOURCE_DIR}/with_stage${STAGE}_test_env.sh")
set(WITH_BENCH_ENV "${CMAKE_CURRENT_SOURCE_DIR}/with_stage${STAGE}_bench_env.sh")
string(APPEND TEST_VARS " TEST_BENCH=")
configure_file(with_env.sh.in "${WITH_TEST_ENV}")
block()

View file

@ -70,15 +70,17 @@ CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS="--rerun-failed"
```
Run an individual test using one of these commands:
Run an individual test using one of these commands.
Note that regex arguments to `-R` need to be double-quoted
if they contain any special shell characters like `|`.
```sh
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS="-R <regex>"
make -C build/release -j "$(nproc)" test ARGS="-R '<regex>'"
# For a specific stage
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release/stage1 -j "$(nproc)" test ARGS="-R <regex>"
make -C build/release/stage1 -j "$(nproc)" test ARGS="-R '<regex>'"
# Manually, without ctest
tests/with_stage1_test_env.sh path/to/test/directory/run_test.sh
@ -103,7 +105,8 @@ make -C build/release -j "$(nproc)" bench-part2 # produces tests/part2.measureme
Make sure not to specify `-j "$(nproc)"` when running the bench suite manually inside `build/release/stage<n>`.
Run an individual benchmark manually using
Run an individual benchmark manually using one of these commands.
Note that the `run_bench.sh` arguments are relative to the test directory, not the current working directory.
```sh
tests/with_stage1_bench_env.sh path/to/test/directory/run_bench.sh
@ -144,7 +147,11 @@ When run as test, the problem instance should be as small as possible, but large
The main mechanism to scale problem instances is the `TEST_BENCH` environment variable.
It is unset in tests and set to `1` in benchmarks.
Inside your benchmark, check whether the variable exists and adjust the problem size accordingly.
Inside your benchmark, check whether the variable exists and adjust the problem size accordingly:
```lean
let bench := (← IO.getEnv "TEST_BENCH") == some "1"
```
Inside the `compile_bench` directory, there is a second mechanism:
Using a `.init.sh` file to pass command line arguments to your test.

View file

@ -21,9 +21,6 @@ Note that we will stick exclusively to the sync interface for this as there is n
reaped from async in this benchmark so we might as well just block.
-/
def MESSAGES : Nat := 100_000
def THREADS : Nat := 4
def seq (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
for i in *...amount do
ch.send i
@ -43,11 +40,11 @@ def spsc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
IO.ofExcept (← IO.wait t1)
IO.wait t2
def mpsc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
let mut producers := Array.emptyWithCapacity THREADS
for _ in *...THREADS do
def mpsc (threads : Nat) (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
let mut producers := Array.emptyWithCapacity threads
for _ in *...threads do
let t ← IO.asTask (prio := .dedicated) do
for i in *...(amount/THREADS) do
for i in *...(amount/threads) do
ch.send i
producers := producers.push t
@ -59,16 +56,16 @@ def mpsc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
for producer in producers do
(IO.ofExcept (← IO.wait producer))
def mpmc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
let mut producers := Array.emptyWithCapacity THREADS
for _ in *...THREADS do
def mpmc (threads : Nat) (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
let mut producers := Array.emptyWithCapacity threads
for _ in *...threads do
let t ← IO.asTask (prio := .dedicated) do
for i in *...(amount/THREADS) do
for i in *...(amount/threads) do
ch.send i
producers := producers.push t
let mut consumers := Array.emptyWithCapacity THREADS
for _ in *...THREADS do
let mut consumers := Array.emptyWithCapacity threads
for _ in *...threads do
let t ← IO.asTask (prio := .dedicated) do
while true do
if let some _ ← ch.recv then
@ -87,31 +84,33 @@ def mpmc (ch : Std.CloseableChannel.Sync Nat) (amount : Nat) : IO Unit := do
return ()
def run (name : String) (cap : Option Nat) (bench : Std.CloseableChannel.Sync Nat → Nat → IO Unit) :
IO Unit := do
def run (name : String) (cap : Option Nat) (messages : Nat)
(bench : Std.CloseableChannel.Sync Nat → Nat → IO Unit) : IO Unit := do
let ch ← Std.CloseableChannel.new cap
let t1 ← IO.monoMsNow
bench ch.sync MESSAGES
bench ch.sync messages
let t2 ← IO.monoMsNow
let time : Float := (t2 - t1).toFloat / 1000.0
IO.println s!"measurement: {name} {time} s"
def main : IO Unit := do
run "bounded0_spsc" (some 0) spsc
run "bounded0_mpsc" (some 0) mpsc
run "bounded0_mpmc" (some 0) mpmc
def main (args : List String) : IO Unit := do
let threads := args[0]!.toNat!
let messages := args[1]!.toNat!
run "bounded0_spsc" (some 0) messages spsc
run "bounded0_mpsc" (some 0) messages (mpsc threads)
run "bounded0_mpmc" (some 0) messages (mpmc threads)
run "bounded1_spsc" (some 1) spsc
run "bounded1_mpsc" (some 1) mpsc
run "bounded1_mpmc" (some 1) mpmc
run "bounded1_spsc" (some 1) messages spsc
run "bounded1_mpsc" (some 1) messages (mpsc threads)
run "bounded1_mpmc" (some 1) messages (mpmc threads)
run "boundedn_spsc" (some MESSAGES) spsc
run "boundedn_mpsc" (some MESSAGES) mpsc
run "boundedn_mpmc" (some MESSAGES) mpmc
run "boundedn_seq" (some MESSAGES) seq
run "boundedn_spsc" (some messages) messages spsc
run "boundedn_mpsc" (some messages) messages (mpsc threads)
run "boundedn_mpmc" (some messages) messages (mpmc threads)
run "boundedn_seq" (some messages) messages seq
run "unbounded_spsc" none spsc
run "unbounded_mpsc" none mpsc
run "unbounded_mpmc" none mpmc
run "unbounded_seq" none seq
run "unbounded_spsc" none messages spsc
run "unbounded_mpsc" none messages (mpsc threads)
run "unbounded_mpmc" none messages (mpmc threads)
run "unbounded_seq" none messages seq

View file

@ -0,0 +1,5 @@
TEST_ARGS=( 4 1000 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 4 100000 )
fi

View file

@ -1,4 +1,4 @@
TEST_ARGS=( 9 )
TEST_ARGS=( 7 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 10 )

View file

@ -5,5 +5,3 @@
5 count: 2202
6 count: 12886
7 count: 83648
8 count: 598592
9 count: 4697200

View file

@ -1 +1,5 @@
TEST_ARGS=( 11 10000 )
TEST_ARGS=( 11 100 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 11 10000 )
fi

View file

@ -1 +1,5 @@
TEST_ARGS=( 200000 )
TEST_ARGS=( 1000 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 200000 )
fi

View file

@ -1 +1,5 @@
TEST_ARGS=( 5000 )
TEST_ARGS=( 500 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 5000 )
fi

View file

@ -1 +1 @@
44945605
324355

View file

@ -1 +1,5 @@
TEST_ARGS=( 11 10000 )
TEST_ARGS=( 11 100 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 11 10000 )
fi

View file

@ -164,10 +164,10 @@ def f._at_.g.spec_0._redArg _x.1 _x.2 _x.3 it : Nat :=
...
```
-/
def g : Nat := Id.run do
(*...30000000).iter.filter (fun _ => True)
def g (n : Nat) : Nat := Id.run do
(*...n).iter.filter (fun _ => True)
|>.sigma (α := fun _ => _) (param := 0)
|> f (acc := 0)
def main : IO Unit :=
IO.println g
def main (args : List String) : IO Unit :=
IO.println (g args[0]!.toNat!)

View file

@ -0,0 +1,5 @@
TEST_ARGS=( 100 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 30000000 )
fi

View file

@ -1 +1 @@
449999985000000
4950

View file

@ -1 +1,5 @@
TEST_ARGS=( 11 10000 )
TEST_ARGS=( 11 100 )
if [[ -n $TEST_BENCH ]]; then
TEST_ARGS=( 11 10000 )
fi

View file

@ -35,7 +35,9 @@ set_option maxHeartbeats 400000
def runCbvTests : MetaM Unit := do
IO.println "=== Call-By-Value Tactic Tests ==="
IO.println ""
for n in [6,10,12,14,15,18,20] do
let bench := (← IO.getEnv "TEST_BENCH") == some "1"
let ns := if bench then [6,10,12,14,15,18,20] else [6]
for n in ns do
runSingleTest n
#eval runCbvTests

View file

@ -24,7 +24,8 @@ def runProblem (n : Nat) : MetaM Unit := do
def runCbvTests : MetaM Unit := do
IO.println "=== Call-By-Value Tactic Tests ==="
IO.println ""
for n in List.range 200 do
let bench := (← IO.getEnv "TEST_BENCH") == some "1"
for n in List.range (if bench then 200 else 10) do
runProblem n
#eval runCbvTests

View file

@ -183,7 +183,9 @@ set_option maxHeartbeats 400000
def runCbvTests : MetaM Unit := do
IO.println "=== Call-By-Value Tactic Tests ==="
IO.println ""
for n in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000] do
let bench := (← IO.getEnv "TEST_BENCH") == some "1"
let ns := if bench then [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000] else [10]
for n in ns do
runSingleTest n
def runDecideTests : MetaM Unit := do

View file

@ -46,7 +46,9 @@ def runProblem (n : Nat) : MetaM Unit := do
def runBenchmarks : MetaM Unit := do
IO.println "=== Merge Sort CBV Benchmarks ==="
IO.println ""
for n in [10, 25, 50, 75, 100, 125, 175] do
let bench := (← IO.getEnv "TEST_BENCH") == some "1"
let sizes := if bench then [10, 25, 50, 75, 100, 125, 175] else [10]
for n in sizes do
runProblem n
#eval runBenchmarks

View file

@ -9,8 +9,10 @@ def bench (pattern : String) : TermElabM Unit := do
let env ← getEnv
-- IO.println s!"{env.constants.size} decls"
let list := env.constants.toList |>.map fun (n, _) => n.toString
IO.println s!"Matching {list.length * TURNS} decls"
for _ in 0...TURNS do
let bench := (← IO.getEnv "TEST_BENCH") == some "1"
let turns := if bench then TURNS else 1
IO.println s!"Matching {list.length * turns} decls"
for _ in 0...turns do
let mut n := 0
for name in list do
if pattern.charactersIn name then n := n + 1

View file

@ -1 +0,0 @@
grind_bitvec2.lean:291:8-291:27: warning: declaration uses `sorry`

View file

View file

@ -11,7 +11,6 @@ capture_only "$1" \
normalize_mvar_suffixes
normalize_reference_urls
extract_measurements "$TOPIC"
check_out_file
check_exit_is_success
run_after "$1"

View file

@ -114,7 +114,10 @@ def measure_perf(cmd: list[str], events: set[str], capture: bool) -> MeasureResu
# Execute command
result = subprocess.run(cmd, env=env, capture_output=capture, encoding="utf-8")
if result.returncode != 0:
sys.exit(result.returncode)
if capture:
print(result.stdout, end="", file=sys.stdout)
print(result.stderr, end="", file=sys.stderr)
raise SystemExit(result.returncode)
# Collect results
perf: PerfResults = {}

View file

@ -92,6 +92,7 @@ function capture_only {
# since $TMP_DIR is already specific to this test run.
CAPTURED="${1:-"$TMP_DIR/tmp"}"; shift
EXIT=0; (set -x; "${@}" > "$CAPTURED.out.produced" 2>&1) || EXIT="$?"
cat "$CAPTURED.out.produced"
}
# Run a command, capturing its output and failing if the command fails.