From 087f0b4a69ef2e4f30eeccabb43e836d56989685 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Sat, 22 Feb 2025 17:43:39 +0100 Subject: [PATCH] perf: optimize `sorry` detection in unused variables linter (#7129) This PR optimizes the performance of the unused variables linter in the case of a definition with a huge `Expr` representation --- src/Lean/Linter/UnusedVariables.lean | 14 ++++-- src/Lean/Server/InfoUtils.lean | 19 -------- src/kernel/expr.h | 18 +++++++ src/kernel/for_each_fn.cpp | 4 +- src/kernel/replace_fn.cpp | 17 ------- tests/bench/omega_stress.lean | 62 ++++++++++++++++++++++++ tests/bench/speedcenter.exec.velcom.yaml | 6 +++ 7 files changed, 99 insertions(+), 41 deletions(-) create mode 100644 tests/bench/omega_stress.lean diff --git a/src/Lean/Linter/UnusedVariables.lean b/src/Lean/Linter/UnusedVariables.lean index b733d8ec92..142c1d11f6 100644 --- a/src/Lean/Linter/UnusedVariables.lean +++ b/src/Lean/Linter/UnusedVariables.lean @@ -462,6 +462,12 @@ where else stx +private def hasSorry (stx : Syntax) : Bool := + stx.find? (fun + -- `@[unused_variables_ignore_fn]` can be used to extend this list + | `(sorry) | `(tactic| sorry) | `(tactic| admit) => true + | _ => false) |>.isSome + /-- Reports unused variable warnings on each command. Use `linter.unusedVariables` to disable. -/ def unusedVariables : Linter where run cmdStx := do @@ -475,11 +481,13 @@ def unusedVariables : Linter where let some cmdStxRange := cmdStx.getRange? | return - let infoTrees := (← get).infoState.trees.toArray - - if (← infoTrees.anyM (·.hasSorry)) then + -- We used to look for `sorry` on the `Expr` level, which is more robust, but just too expensive + -- on huge declarations (see e.g. `omega_stress` benchmark). + if hasSorry cmdStx then return + let infoTrees := (← get).infoState.trees.toArray + -- Run the main collection pass, resulting in `s : References`. let (_, s) ← (collectReferences infoTrees cmdStxRange).run {} diff --git a/src/Lean/Server/InfoUtils.lean b/src/Lean/Server/InfoUtils.lean index 071943cf13..636d576c22 100644 --- a/src/Lean/Server/InfoUtils.lean +++ b/src/Lean/Server/InfoUtils.lean @@ -444,23 +444,4 @@ partial def InfoTree.termGoalAt? (t : InfoTree) (hoverPos : String.Pos) : Option -- In the case `f a b`, where `f` is an identifier, the term goal at `f` should be the goal for the full application `f a b`. hoverableInfoAt? t hoverPos (includeStop := true) (omitAppFns := true) -partial def InfoTree.hasSorry : InfoTree → IO Bool := - go none -where go ci? - | .context ci t => go (ci.mergeIntoOuter? ci?) t - | .node i cs => - match ci?, i with - | some ci, .ofTermInfo ti - | some ci, .ofDelabTermInfo ti => do - -- NOTE: `instantiateMVars` can potentially be expensive but we rely on the elaborator - -- creating a fully instantiated `MutualDef.body` term info node which has the implicit effect - -- of making the `instantiateMVars` here a no-op and avoids further recursing into the body - let expr ← ti.runMetaM ci (instantiateMVars ti.expr) - return expr.hasSorry - -- we assume that `cs` are subterms of `ti.expr` and - -- thus do not have to be checked as well - | _, _ => - cs.anyM (go ci?) - | _ => return false - end Lean.Elab diff --git a/src/kernel/expr.h b/src/kernel/expr.h index c110ebd26c..38cef00a98 100644 --- a/src/kernel/expr.h +++ b/src/kernel/expr.h @@ -361,4 +361,22 @@ inline expr update_constant(expr const & e, levels const & new_levels) { return It also returns the meta-variable application found in \c e. */ optional has_expr_metavar_strict(expr const & e); inline bool is_constant(expr const & e, name const & n) { return is_const(e, n); } + +/* Like `is_exclusive`, but also consider unique MT references as unshared, which ensures we get + * similar performance on the cmdline and server (more precisely, for either option value of + * `internal.cmdlineSnapshots`). Note that as `e` is merely *borrowed* (e.g. from the mctx in + * the case of `instantiate_mvars` where the performance issue resolved here manifested, #5614), + * it is in fact possible that another thread could simultaneously add a new direct reference to + * `e`, so it is not definitely unshared in all cases if the below check is true. + * + * However, as we use this predicate merely as a conservative heuristic for detecting + * expressions that are unshared *within the expression tree* at hand, the approximation is + * still correct in this case. Furthermore, as we only use it for deciding when to cache + * results, it ultimately does not affect the correctness of the overall procedure in any case. + * This should however be kept in mind if we start using `is_likely_unshared` in other contexts. + */ +inline bool is_likely_unshared(expr const & e) { + return e.raw()->m_rc == 1 || e.raw()->m_rc == -1; +} + } diff --git a/src/kernel/for_each_fn.cpp b/src/kernel/for_each_fn.cpp index 101e64215b..ec33110344 100644 --- a/src/kernel/for_each_fn.cpp +++ b/src/kernel/for_each_fn.cpp @@ -23,7 +23,7 @@ template class for_each_fn { std::function m_f; // NOLINT bool visited(expr const & e) { - if (!is_shared(e)) return false; + if (is_likely_unshared(e)) return false; if (m_cache.find(e.raw()) != m_cache.end()) return true; m_cache.insert(e.raw()); return false; @@ -99,7 +99,7 @@ class for_each_offset_fn { std::function m_f; // NOLINT bool visited(expr const & e, unsigned offset) { - if (!is_shared(e)) return false; + if (is_likely_unshared(e)) return false; if (m_cache.find(std::make_pair(e.raw(), offset)) != m_cache.end()) return true; m_cache.insert(std::make_pair(e.raw(), offset)); return false; diff --git a/src/kernel/replace_fn.cpp b/src/kernel/replace_fn.cpp index f97ef05cc6..eb8b8cc540 100644 --- a/src/kernel/replace_fn.cpp +++ b/src/kernel/replace_fn.cpp @@ -12,23 +12,6 @@ Author: Leonardo de Moura namespace lean { -/* Like `is_exclusive`, but also consider unique MT references as unshared, which ensures we get - * similar performance on the cmdline and server (more precisely, for either option value of - * `internal.cmdlineSnapshots`). Note that as `e` is merely *borrowed* (e.g. from the mctx in - * the case of `instantiate_mvars` where the performance issue resolved here manifested, #5614), - * it is in fact possible that another thread could simultaneously add a new direct reference to - * `e`, so it is not definitely unshared in all cases if the below check is true. - * - * However, as we use this predicate merely as a conservative heuristic for detecting - * expressions that are unshared *within the expression tree* at hand, the approximation is - * still correct in this case. Furthermore, as we only use it for deciding when to cache - * results, it ultimately does not affect the correctness of the overall procedure in any case. - * This should however be kept in mind if we start using `is_likely_unshared` in other contexts. - */ -static bool is_likely_unshared(expr const & e) { - return e.raw()->m_rc == 1 || e.raw()->m_rc == -1; -} - class replace_rec_fn { struct key_hasher { std::size_t operator()(std::pair const & p) const { diff --git a/tests/bench/omega_stress.lean b/tests/bench/omega_stress.lean new file mode 100644 index 0000000000..7d8a0d2550 --- /dev/null +++ b/tests/bench/omega_stress.lean @@ -0,0 +1,62 @@ +/-! +This benchmark demonstrates creating a definition with many nested `omega` proofs, exercising +components that traverse terms before `abstractProofs` runs. + +From https://leanprover.zulipchat.com/#narrow/channel/287929-mathlib4/topic/slow.20syntax.20linters/near/500291283 +By Bhavik Mehta +Extracted originally to dsemonstrate an unused variables linter performance issue. +-/ + +def Hollom : Type := Nat × Nat × Nat + +def ofHollom : Hollom → Nat × Nat × Nat := id + +namespace Hollom + +inductive HollomOrder : Nat × Nat × Nat → Nat × Nat × Nat → Prop + | twice {x y n u v m : Nat} : m + 2 ≤ n → HollomOrder (x, y, n) (u, v, m) + | within {x y u v m : Nat} : x ≤ u → y ≤ v → HollomOrder (x, y, m) (u, v, m) + | next_min {x y u v m : Nat} : min x y + 1 ≤ min u v → HollomOrder (x, y, m + 1) (u, v, m) + | next_add {x y u v m : Nat} : x + y ≤ 2 * (u + v) → HollomOrder (x, y, m + 1) (u, v, m) + +set_option profiler true +set_option profiler.threshold 10 + +class Preorder (α : Type) extends LE α, LT α where + le_refl : ∀ a : α, a ≤ a + le_trans : ∀ a b c : α, a ≤ b → b ≤ c → a ≤ c + lt := fun a b => a ≤ b ∧ ¬b ≤ a + lt_iff_le_not_le : ∀ a b : α, a < b ↔ a ≤ b ∧ ¬b ≤ a := by intros; rfl + +class PartialOrder (α : Type) extends Preorder α where + le_antisymm : ∀ a b : α, a ≤ b → b ≤ a → a = b + +def test : PartialOrder Hollom where + le x y := HollomOrder (ofHollom x) (ofHollom y) + le_refl x := .within (Nat.le_refl _) (Nat.le_refl _) + le_trans + | _, _, _, .twice _, .twice _ => .twice (by omega) + | _, _, _, .twice _, .within _ _ => .twice (by omega) + | _, _, _, .twice _, .next_min _ => .twice (by omega) + | _, _, _, .twice _, .next_add _ => .twice (by omega) + | _, _, _, .within _ _, .twice _ => .twice (by omega) + | _, _, _, .within _ _, .within _ _ => .within (by omega) (by omega) + | _, _, _, .within _ _, .next_min _ => .next_min (by omega) + | _, _, _, .within _ _, .next_add _ => .next_add (by omega) + | _, _, _, .next_min _, .twice _ => .twice (by omega) + | _, _, _, .next_min _, .within _ _ => .next_min (by omega) + | _, _, _, .next_min _, .next_min _ => .twice (by omega) + | _, _, _, .next_min _, .next_add _ => .twice (by omega) + | _, _, _, .next_add _, .twice _ => .twice (by omega) + | _, _, _, .next_add _, .within _ _ => .next_add (by omega) + | _, _, _, .next_add _, .next_min _ => .twice (by omega) + | _, _, _, .next_add _, .next_add _ => .twice (by omega) + le_antisymm + | _, _, .twice _, .twice _ => by omega + | _, (_, _, _), .twice _, .within _ _ => by omega + | _, _, .twice _, .next_min _ => by omega + | _, _, .twice _, .next_add _ => by omega + | _, _, .within _ _, .twice _ => by omega + | _, _, .within _ _, .within _ _ => by congr 3 <;> omega + | _, _, .next_min _, .twice _ => by omega + | _, _, .next_add _, .twice _ => by omega diff --git a/tests/bench/speedcenter.exec.velcom.yaml b/tests/bench/speedcenter.exec.velcom.yaml index edb6dc0579..c2506fc8df 100644 --- a/tests/bench/speedcenter.exec.velcom.yaml +++ b/tests/bench/speedcenter.exec.velcom.yaml @@ -405,3 +405,9 @@ run_config: <<: *time cmd: lean big_omega.lean -Dinternal.cmdlineSnapshots=false +- attributes: + description: omega_stress.lean async + tags: [fast] + run_config: + <<: *time + cmd: lean omega_stress.lean -DElab.async=true