test: resurrect try cancellation tests (#13707)

This PR resurrects the tests about cancellation of `try?` tactics.

They were flaky before. By introducing an option
(`debug.tactic.try.onlyUserSuggestions`) to make these tests not run
`exact?` as part of `try?`, and thus be generally faster and use less
memory, the flakiness seems to have disappeared, at least in
non-`master` testing. We’ll see.

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joachim Breitner 2026-05-13 15:54:48 +02:00 committed by GitHub
parent 793cd14b38
commit 6279ae98c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 205 additions and 12 deletions

View file

@ -21,6 +21,15 @@ open Meta
A **very** simple `try?` tactic implementation.
-/
register_builtin_option debug.tactic.try.onlyUserSuggestions : Bool := {
defValue := false
descr := "if set, `try?` skips its built-in suggestion branches \
(simple, simp, grind, simp_all, induction/fun_induction, exact?) and only \
runs tactics produced by user-registered `@[try_suggestion]` generators. \
Primarily intended for tests that want to exercise the `try?` machinery \
without paying for library search and other expensive default branches."
}
declare_config_elab elabTryConfig Try.Config
namespace Try
@ -966,18 +975,7 @@ private def mkAllIndStx (info : Try.Info) (cont : TSyntax `tactic) : MetaM (TSyn
/-- Returns tactic for `evalAndSuggest` (unsafe version that can evaluate user generators) -/
private unsafe def mkTryEvalSuggestStxUnsafe (goal : MVarId) (info : Try.Info) : MetaM (TSyntax `tactic) := do
let simple ← mkSimpleTacStx
let simp ← mkSimpStx
let grind ← mkGrindStx info
let atomic ← `(tactic| attempt_all_par | $simple:tactic | $simp:tactic | $grind:tactic | simp_all)
let atomicSuggestions ← mkAtomicWithSuggestionsStx
let atomicOrSuggestions ← `(tactic| first | $atomic:tactic | $atomicSuggestions:tactic)
let funInds ← mkAllFunIndStx info atomicOrSuggestions
let inds ← mkAllIndStx info atomicOrSuggestions
let extra ← `(tactic| (intros; first | $simple:tactic | $simp:tactic | exact?))
-- Collect user-defined suggestions (runs after built-in tactics)
-- Collect user-defined suggestions.
let userEntries := trySuggestionExtension.getState (← getEnv)
let mut userTactics := #[]
for entry in userEntries do
@ -991,6 +989,25 @@ private unsafe def mkTryEvalSuggestStxUnsafe (goal : MVarId) (info : Try.Info) :
catch e =>
logWarning m!"try_suggestion generator {entry.name} failed: {e.toMessageData}"
if debug.tactic.try.onlyUserSuggestions.get (← getOptions) then
-- Bypass built-in branches entirely; only run user-registered tactics.
if userTactics.isEmpty then
return ← `(tactic|
fail "debug.tactic.try.onlyUserSuggestions is set but no user suggestions were produced")
else
return ← `(tactic| attempt_all_par $[| $userTactics:tactic]*)
let simple ← mkSimpleTacStx
let simp ← mkSimpStx
let grind ← mkGrindStx info
let atomic ← `(tactic| attempt_all_par | $simple:tactic | $simp:tactic | $grind:tactic | simp_all)
let atomicSuggestions ← mkAtomicWithSuggestionsStx
let atomicOrSuggestions ← `(tactic| first | $atomic:tactic | $atomicSuggestions:tactic)
let funInds ← mkAllFunIndStx info atomicOrSuggestions
let inds ← mkAllIndStx info atomicOrSuggestions
let extra ← `(tactic| (intros; first | $simple:tactic | $simp:tactic | exact?))
-- Build final tactic: built-ins first, then user suggestions as fallback
if userTactics.isEmpty then
`(tactic| first | $atomic:tactic | $atomicSuggestions:tactic | $funInds:tactic | $inds:tactic | $extra:tactic)

View file

@ -0,0 +1,87 @@
import Lean.Server.Test.Cancel
import Lean.Elab.Tactic.Try
open Lean Lean.Meta Lean.Elab Lean.Elab.Term Lean.Elab.Tactic
open Lean.Server.Test.Cancel
-- Diagnostic anchor: synchronous, fires exactly once at command-elab time
-- before any async task is spawned by the rest of the file. If a CI / stress
-- failure shows this line in `.out.produced`, the file worker at least got
-- past imports.
#eval (IO.eprintln "test: imports done" : IO Unit)
/-!
Test that `cancelRec` reaches the snapshot task spawned by
`elabEmptyByAsTry` on re-elaboration.
Chronological flow:
1. Empty-`by` example elaborates; `elabEmptyByAsTry` spawns a snapshot
task with its own cancel token and returns.
2. The snapshot task's `try?` calls `tracerSuggestion`, which:
- registers test task `"cancelTokenSet"`,
- registers `cancelTk.onSet` to resolve it,
- resolves sync promise `"tracerSuggestion"`,
- returns `wait_for_test_task "cancelTokenSet"` as the candidate.
3. `try?` evaluates the candidate; it blocks on the test task.
4. `t1` elaborates: `wait_for_sync "tracerSuggestion"` returns
immediately, `trace "blocked"` emits the diagnostic.
5. Runner inserts `; skip`, triggers re-elab; `cancelRec` sets the
cancel token; `onSet` resolves the test task; the candidate's wait
returns; the snapshot task body completes.
Failure modes:
- `cancelTk? := none` to `Core.logSnapshotTask`: `cancelRec` cannot
reach the cancel token, the wait blocks, runner times out.
- `cancelTk? := none` to `wrapAsyncAsSnapshot`: `tracerSuggestion`
sees no cancel token, doesn't register `onSet`; the promise drops;
`wait_for_test_task` surfaces a `task dropped` diagnostic.
Ordering note: the empty-`by` example must precede `t1` because `try?`
inside the snapshot task library-searches the environment in a way
that synchronously waits on prior pending async theorem bodies. With
`t1` first, its `wait_for_sync` would block the snapshot's `try?`,
deadlocking. Likely a separate upstream issue.
-/
namespace TestEmptyBy
opaque UnsolvableProp : Prop
@[try_suggestion]
def tracerSuggestion (_goal : MVarId) (_info : Try.Info) :
MetaM (Array (TSyntax `tactic)) := do
-- Keep dbg_trace output here minimal and gated to the first invocation:
-- this function runs in the snapshot task's stderr buffer, which interleaves
-- non-deterministically with the main elab thread's output. Every additional
-- trace point adds a potential race in `.out.expected`.
if let some prom ← mkTestTask "cancelTokenSet" then
if let some cancelTk := (← readThe Core.Context).cancelTk? then
cancelTk.onSet do
dbg_trace "cancelTokenSet"
prom.resolve ()
else
dbg_trace "tracerSuggestion: no cancel token (unexpected -- test setup wrong?)"
dbg_trace "tracerSuggestion ready"
resolveSyncPromise "tracerSuggestion"
return #[← `(tactic| wait_for_test_task "cancelTokenSet")]
end TestEmptyBy
set_option tactic.tryOnEmptyBy true
-- Skip the expensive built-in `try?` branches (`simp`/`grind`/`exact?`/…). The
-- test only cares about the user-registered `tracerSuggestion` running inside
-- the snapshot task; the library-search branches are pure overhead here.
set_option debug.tactic.try.onlyUserSuggestions true
example : True := by
trivial
--^ waitFor: blocked
--^ insert: "; skip"
--^ sync
example : TestEmptyBy.UnsolvableProp := by
theorem t1 : True := by
wait_for_sync "tracerSuggestion"
dbg_trace "sync received"
trace "blocked"
trivial

View file

@ -0,0 +1,5 @@
test: imports done
cancelTokenSet
tracerSuggestion ready
sync received
sync received

View file

@ -0,0 +1,81 @@
import Lean.Server.Test.Cancel
open Lean.Server.Test.Cancel
/-!
Test that cancellation propagates into parallel tactic combinators (`attempt_all_par`,
`first_par`).
Per section, chronological flow:
1. `theorem t` elaborates; its body runs `try? => <combinator> | block_until_cancelled "<L>"`,
which on the first invocation registers test task `"<L>"`, resolves sync promise `"<L>"`,
and enters a `Core.checkInterrupted` loop.
2. The gate theorem (`tGate`) elaborates; `wait_for_sync "<L>"` returns immediately (sync was
resolved in step 1), `trace "blocked"` emits the diagnostic the runner is waiting for.
3. Runner inserts `; skip`, triggers re-elab; `cancelRec` walks `t`'s snapshot tree, setting
the cancel token of the `block_until_cancelled` subtask. The loop's `Core.checkInterrupted`
throws, the `finally` resolves the test task, the second invocation's wait returns.
Section 1 uses sequential `first`; sections 2 and 3 use the parallel combinators
`attempt_all_par` and `first_par`, which spawn the inner tactic on a fresh `asTask` cancel
token. The fix in `CoreM.asTask` (#13428) propagates the parent token to those subtasks;
without it, `cancelRec` cannot reach the subtask's cancel token and the test times out.
-/
/-! ## Sequential `first` -/
example : True := by
trivial
--^ waitFor: blocked
--^ insert: "; skip"
--^ sync
theorem t : True := by
try? => first
| block_until_cancelled "first"
theorem tGate : True := by
wait_for_sync "first"
trace "blocked"
trivial
-- RESET
import Lean.Server.Test.Cancel
open Lean.Server.Test.Cancel
/-! ## Parallel `attempt_all_par` -/
example : True := by
trivial
--^ waitFor: blocked
--^ insert: "; skip"
--^ sync
theorem t : True := by
try? => attempt_all_par
| block_until_cancelled "attempt_all_par"
theorem tGate : True := by
wait_for_sync "attempt_all_par"
trace "blocked"
trivial
-- RESET
import Lean.Server.Test.Cancel
open Lean.Server.Test.Cancel
/-! ## Parallel `first_par` -/
example : True := by
trivial
--^ waitFor: blocked
--^ insert: "; skip"
--^ sync
theorem t : True := by
try? => first_par
| block_until_cancelled "first_par"
theorem tGate : True := by
wait_for_sync "first_par"
trace "blocked"
trivial

View file

@ -0,0 +1,3 @@
first: blocked
attempt_all_par: blocked
first_par: blocked