perf: optimize sorry detection in unused variables linter (#7129)

This PR optimizes the performance of the unused variables linter in the case of a definition with a huge `Expr` representation
2025-02-22 17:43:39 +01:00 · 2025-02-22 17:43:39 +01:00 · 087f0b4a69
commit 087f0b4a69
parent a7bdc55244
7 changed files with 99 additions and 41 deletions
--- a/src/Lean/Linter/UnusedVariables.lean
+++ b/src/Lean/Linter/UnusedVariables.lean
@ -462,6 +462,12 @@ where
    else
      stx

+private def hasSorry (stx : Syntax) : Bool :=
+  stx.find? (fun
+    -- `@[unused_variables_ignore_fn]` can be used to extend this list
+    | `(sorry) | `(tactic| sorry) | `(tactic| admit) => true
+    | _ => false) |>.isSome
+
 /-- Reports unused variable warnings on each command. Use `linter.unusedVariables` to disable. -/
 def unusedVariables : Linter where
  run cmdStx := do
@ -475,11 +481,13 @@ def unusedVariables : Linter where
    let some cmdStxRange := cmdStx.getRange?
      | return

-    let infoTrees := (← get).infoState.trees.toArray
-
-    if (← infoTrees.anyM (·.hasSorry)) then
+    -- We used to look for `sorry` on the `Expr` level, which is more robust, but just too expensive
+    -- on huge declarations (see e.g. `omega_stress` benchmark).
+    if hasSorry cmdStx then
      return

+    let infoTrees := (← get).infoState.trees.toArray
+
    -- Run the main collection pass, resulting in `s : References`.
    let (_, s) ← (collectReferences infoTrees cmdStxRange).run {}

--- a/src/Lean/Server/InfoUtils.lean
+++ b/src/Lean/Server/InfoUtils.lean
@ -444,23 +444,4 @@ partial def InfoTree.termGoalAt? (t : InfoTree) (hoverPos : String.Pos) : Option
  -- In the case `f a b`, where `f` is an identifier, the term goal at `f` should be the goal for the full application `f a b`.
  hoverableInfoAt? t hoverPos (includeStop := true) (omitAppFns := true)

-partial def InfoTree.hasSorry : InfoTree → IO Bool :=
-  go none
-where go ci?
-  | .context ci t => go (ci.mergeIntoOuter? ci?) t
-  | .node i cs =>
-    match ci?, i with
-    | some ci, .ofTermInfo ti
-    | some ci, .ofDelabTermInfo ti => do
-      -- NOTE: `instantiateMVars` can potentially be expensive but we rely on the elaborator
-      -- creating a fully instantiated `MutualDef.body` term info node which has the implicit effect
-      -- of making the `instantiateMVars` here a no-op and avoids further recursing into the body
-      let expr ← ti.runMetaM ci (instantiateMVars ti.expr)
-      return expr.hasSorry
-      -- we assume that `cs` are subterms of `ti.expr` and
-      -- thus do not have to be checked as well
-    | _, _ =>
-      cs.anyM (go ci?)
-  | _ => return false
-
 end Lean.Elab
--- a/src/kernel/expr.h
+++ b/src/kernel/expr.h
@ -361,4 +361,22 @@ inline expr update_constant(expr const & e, levels const & new_levels) { return
    It also returns the meta-variable application found in \c e. */
 optional<expr> has_expr_metavar_strict(expr const & e);
 inline bool is_constant(expr const & e, name const & n) { return is_const(e, n); }
+
+/* Like `is_exclusive`, but also consider unique MT references as unshared, which ensures we get
+ * similar performance on the cmdline and server (more precisely, for either option value of
+ * `internal.cmdlineSnapshots`). Note that as `e` is merely *borrowed* (e.g. from the mctx in
+ * the case of `instantiate_mvars` where the performance issue resolved here manifested, #5614),
+ * it is in fact possible that another thread could simultaneously add a new direct reference to
+ * `e`, so it is not definitely unshared in all cases if the below check is true.
+ *
+ * However, as we use this predicate merely as a conservative heuristic for detecting
+ * expressions that are unshared *within the expression tree* at hand, the approximation is
+ * still correct in this case. Furthermore, as we only use it for deciding when to cache
+ * results, it ultimately does not affect the correctness of the overall procedure in any case.
+ * This should however be kept in mind if we start using `is_likely_unshared` in other contexts.
+ */
+inline bool is_likely_unshared(expr const & e) {
+    return e.raw()->m_rc == 1 || e.raw()->m_rc == -1;
+}
+
 }
--- a/src/kernel/for_each_fn.cpp
+++ b/src/kernel/for_each_fn.cpp
@ -23,7 +23,7 @@ template<bool partial_apps> class for_each_fn {
    std::function<bool(expr const &)> m_f; // NOLINT

    bool visited(expr const & e) {
-        if (!is_shared(e)) return false;
+        if (is_likely_unshared(e)) return false;
        if (m_cache.find(e.raw()) != m_cache.end()) return true;
        m_cache.insert(e.raw());
        return false;
@ -99,7 +99,7 @@ class for_each_offset_fn {
    std::function<bool(expr const &, unsigned)> m_f; // NOLINT

    bool visited(expr const & e, unsigned offset) {
-        if (!is_shared(e)) return false;
+        if (is_likely_unshared(e)) return false;
        if (m_cache.find(std::make_pair(e.raw(), offset)) != m_cache.end()) return true;
        m_cache.insert(std::make_pair(e.raw(), offset));
        return false;
--- a/src/kernel/replace_fn.cpp
+++ b/src/kernel/replace_fn.cpp
@ -12,23 +12,6 @@ Author: Leonardo de Moura

 namespace lean {

-/* Like `is_exclusive`, but also consider unique MT references as unshared, which ensures we get
- * similar performance on the cmdline and server (more precisely, for either option value of
- * `internal.cmdlineSnapshots`). Note that as `e` is merely *borrowed* (e.g. from the mctx in
- * the case of `instantiate_mvars` where the performance issue resolved here manifested, #5614),
- * it is in fact possible that another thread could simultaneously add a new direct reference to
- * `e`, so it is not definitely unshared in all cases if the below check is true.
- *
- * However, as we use this predicate merely as a conservative heuristic for detecting
- * expressions that are unshared *within the expression tree* at hand, the approximation is
- * still correct in this case. Furthermore, as we only use it for deciding when to cache
- * results, it ultimately does not affect the correctness of the overall procedure in any case.
- * This should however be kept in mind if we start using `is_likely_unshared` in other contexts.
- */
-static bool is_likely_unshared(expr const & e) {
-    return e.raw()->m_rc == 1 || e.raw()->m_rc == -1;
-}
-
 class replace_rec_fn {
    struct key_hasher {
        std::size_t operator()(std::pair<lean_object *, unsigned> const & p) const {
--- a/tests/bench/omega_stress.lean
+++ b/tests/bench/omega_stress.lean
@ -0,0 +1,62 @@
+/-!
+This benchmark demonstrates creating a definition with many nested `omega` proofs, exercising
+components that traverse terms before `abstractProofs` runs.
+
+From https://leanprover.zulipchat.com/#narrow/channel/287929-mathlib4/topic/slow.20syntax.20linters/near/500291283
+By Bhavik Mehta
+Extracted originally to dsemonstrate an unused variables linter performance issue.
+-/
+
+def Hollom : Type := Nat × Nat × Nat
+
+def ofHollom : Hollom → Nat × Nat × Nat := id
+
+namespace Hollom
+
+inductive HollomOrder : Nat × Nat × Nat → Nat × Nat × Nat → Prop
+  | twice {x y n u v m : Nat} : m + 2 ≤ n → HollomOrder (x, y, n) (u, v, m)
+  | within {x y u v m : Nat} : x ≤ u → y ≤ v → HollomOrder (x, y, m) (u, v, m)
+  | next_min {x y u v m : Nat} : min x y + 1 ≤ min u v → HollomOrder (x, y, m + 1) (u, v, m)
+  | next_add {x y u v m : Nat} : x + y ≤ 2 * (u + v) → HollomOrder (x, y, m + 1) (u, v, m)
+
+set_option profiler true
+set_option profiler.threshold 10
+
+class Preorder (α : Type) extends LE α, LT α where
+  le_refl : ∀ a : α, a ≤ a
+  le_trans : ∀ a b c : α, a ≤ b → b ≤ c → a ≤ c
+  lt := fun a b => a ≤ b ∧ ¬b ≤ a
+  lt_iff_le_not_le : ∀ a b : α, a < b ↔ a ≤ b ∧ ¬b ≤ a := by intros; rfl
+
+class PartialOrder (α : Type) extends Preorder α where
+  le_antisymm : ∀ a b : α, a ≤ b → b ≤ a → a = b
+
+def test : PartialOrder Hollom where
+  le x y := HollomOrder (ofHollom x) (ofHollom y)
+  le_refl x := .within (Nat.le_refl _) (Nat.le_refl _)
+  le_trans
+  | _, _, _, .twice _, .twice _ => .twice (by omega)
+  | _, _, _, .twice _, .within _ _ => .twice (by omega)
+  | _, _, _, .twice _, .next_min _ => .twice (by omega)
+  | _, _, _, .twice _, .next_add _ => .twice (by omega)
+  | _, _, _, .within _ _, .twice _ => .twice (by omega)
+  | _, _, _, .within _ _, .within _ _ => .within (by omega) (by omega)
+  | _, _, _, .within _ _, .next_min _ => .next_min (by omega)
+  | _, _, _, .within _ _, .next_add _ => .next_add (by omega)
+  | _, _, _, .next_min _, .twice _ => .twice (by omega)
+  | _, _, _, .next_min _, .within _ _ => .next_min (by omega)
+  | _, _, _, .next_min _, .next_min _ => .twice (by omega)
+  | _, _, _, .next_min _, .next_add _ => .twice (by omega)
+  | _, _, _, .next_add _, .twice _ => .twice (by omega)
+  | _, _, _, .next_add _, .within _ _ => .next_add (by omega)
+  | _, _, _, .next_add _, .next_min _ => .twice (by omega)
+  | _, _, _, .next_add _, .next_add _ => .twice (by omega)
+  le_antisymm
+  | _, _, .twice _, .twice _ => by omega
+  | _, (_, _, _), .twice _, .within _ _ => by omega
+  | _, _, .twice _, .next_min _ => by omega
+  | _, _, .twice _, .next_add _ => by omega
+  | _, _, .within _ _, .twice _ => by omega
+  | _, _, .within _ _, .within _ _ => by congr 3 <;> omega
+  | _, _, .next_min _, .twice _ => by omega
+  | _, _, .next_add _, .twice _ => by omega
--- a/tests/bench/speedcenter.exec.velcom.yaml
+++ b/tests/bench/speedcenter.exec.velcom.yaml
@ -405,3 +405,9 @@
  run_config:
    <<: *time
    cmd: lean big_omega.lean -Dinternal.cmdlineSnapshots=false
+- attributes:
+    description: omega_stress.lean async
+    tags: [fast]
+  run_config:
+    <<: *time
+    cmd: lean omega_stress.lean -DElab.async=true