From 087f0b4a69ef2e4f30eeccabb43e836d56989685 Mon Sep 17 00:00:00 2001
From: Sebastian Ullrich <sebasti@nullri.ch>
Date: Sat, 22 Feb 2025 17:43:39 +0100
Subject: [PATCH] perf: optimize `sorry` detection in unused variables linter
 (#7129)

This PR optimizes the performance of the unused variables linter in the
case of a definition with a huge `Expr` representation
---
 src/Lean/Linter/UnusedVariables.lean     | 14 ++++--
 src/Lean/Server/InfoUtils.lean           | 19 --------
 src/kernel/expr.h                        | 18 +++++++
 src/kernel/for_each_fn.cpp               |  4 +-
 src/kernel/replace_fn.cpp                | 17 -------
 tests/bench/omega_stress.lean            | 62 ++++++++++++++++++++++++
 tests/bench/speedcenter.exec.velcom.yaml |  6 +++
 7 files changed, 99 insertions(+), 41 deletions(-)
 create mode 100644 tests/bench/omega_stress.lean
diff --git a/src/Lean/Linter/UnusedVariables.lean b/src/Lean/Linter/UnusedVariables.lean
index b733d8ec92..142c1d11f6 100644
--- a/src/Lean/Linter/UnusedVariables.lean
+++ b/src/Lean/Linter/UnusedVariables.lean
@@ -462,6 +462,12 @@ where
     else
       stx
 
+private def hasSorry (stx : Syntax) : Bool :=
+  stx.find? (fun
+    -- `@[unused_variables_ignore_fn]` can be used to extend this list
+    | `(sorry) | `(tactic| sorry) | `(tactic| admit) => true
+    | _ => false) |>.isSome
+
 /-- Reports unused variable warnings on each command. Use `linter.unusedVariables` to disable. -/
 def unusedVariables : Linter where
   run cmdStx := do
@@ -475,11 +481,13 @@ def unusedVariables : Linter where
     let some cmdStxRange := cmdStx.getRange?
       | return
 
-    let infoTrees := (← get).infoState.trees.toArray
-
-    if (← infoTrees.anyM (·.hasSorry)) then
+    -- We used to look for `sorry` on the `Expr` level, which is more robust, but just too expensive
+    -- on huge declarations (see e.g. `omega_stress` benchmark).
+    if hasSorry cmdStx then
       return
 
+    let infoTrees := (← get).infoState.trees.toArray
+
     -- Run the main collection pass, resulting in `s : References`.
     let (_, s) ← (collectReferences infoTrees cmdStxRange).run {}
 
diff --git a/src/Lean/Server/InfoUtils.lean b/src/Lean/Server/InfoUtils.lean
index 071943cf13..636d576c22 100644
--- a/src/Lean/Server/InfoUtils.lean
+++ b/src/Lean/Server/InfoUtils.lean
@@ -444,23 +444,4 @@ partial def InfoTree.termGoalAt? (t : InfoTree) (hoverPos : String.Pos) : Option
   -- In the case `f a b`, where `f` is an identifier, the term goal at `f` should be the goal for the full application `f a b`.
   hoverableInfoAt? t hoverPos (includeStop := true) (omitAppFns := true)
 
-partial def InfoTree.hasSorry : InfoTree → IO Bool :=
-  go none
-where go ci?
-  | .context ci t => go (ci.mergeIntoOuter? ci?) t
-  | .node i cs =>
-    match ci?, i with
-    | some ci, .ofTermInfo ti
-    | some ci, .ofDelabTermInfo ti => do
-      -- NOTE: `instantiateMVars` can potentially be expensive but we rely on the elaborator
-      -- creating a fully instantiated `MutualDef.body` term info node which has the implicit effect
-      -- of making the `instantiateMVars` here a no-op and avoids further recursing into the body
-      let expr ← ti.runMetaM ci (instantiateMVars ti.expr)
-      return expr.hasSorry
-      -- we assume that `cs` are subterms of `ti.expr` and
-      -- thus do not have to be checked as well
-    | _, _ =>
-      cs.anyM (go ci?)
-  | _ => return false
-
 end Lean.Elab
diff --git a/src/kernel/expr.h b/src/kernel/expr.h
index c110ebd26c..38cef00a98 100644
--- a/src/kernel/expr.h
+++ b/src/kernel/expr.h
@@ -361,4 +361,22 @@ inline expr update_constant(expr const & e, levels const & new_levels) { return
     It also returns the meta-variable application found in \c e. */
 optional<expr> has_expr_metavar_strict(expr const & e);
 inline bool is_constant(expr const & e, name const & n) { return is_const(e, n); }
+
+/* Like `is_exclusive`, but also consider unique MT references as unshared, which ensures we get
+ * similar performance on the cmdline and server (more precisely, for either option value of
+ * `internal.cmdlineSnapshots`). Note that as `e` is merely *borrowed* (e.g. from the mctx in
+ * the case of `instantiate_mvars` where the performance issue resolved here manifested, #5614),
+ * it is in fact possible that another thread could simultaneously add a new direct reference to
+ * `e`, so it is not definitely unshared in all cases if the below check is true.
+ *
+ * However, as we use this predicate merely as a conservative heuristic for detecting
+ * expressions that are unshared *within the expression tree* at hand, the approximation is
+ * still correct in this case. Furthermore, as we only use it for deciding when to cache
+ * results, it ultimately does not affect the correctness of the overall procedure in any case.
+ * This should however be kept in mind if we start using `is_likely_unshared` in other contexts.
+ */
+inline bool is_likely_unshared(expr const & e) {
+    return e.raw()->m_rc == 1 || e.raw()->m_rc == -1;
+}
+
 }
diff --git a/src/kernel/for_each_fn.cpp b/src/kernel/for_each_fn.cpp
index 101e64215b..ec33110344 100644
--- a/src/kernel/for_each_fn.cpp
+++ b/src/kernel/for_each_fn.cpp
@@ -23,7 +23,7 @@ template<bool partial_apps> class for_each_fn {
     std::function<bool(expr const &)> m_f; // NOLINT
 
     bool visited(expr const & e) {
-        if (!is_shared(e)) return false;
+        if (is_likely_unshared(e)) return false;
         if (m_cache.find(e.raw()) != m_cache.end()) return true;
         m_cache.insert(e.raw());
         return false;
@@ -99,7 +99,7 @@ class for_each_offset_fn {
     std::function<bool(expr const &, unsigned)> m_f; // NOLINT
 
     bool visited(expr const & e, unsigned offset) {
-        if (!is_shared(e)) return false;
+        if (is_likely_unshared(e)) return false;
         if (m_cache.find(std::make_pair(e.raw(), offset)) != m_cache.end()) return true;
         m_cache.insert(std::make_pair(e.raw(), offset));
         return false;
diff --git a/src/kernel/replace_fn.cpp b/src/kernel/replace_fn.cpp
index f97ef05cc6..eb8b8cc540 100644
--- a/src/kernel/replace_fn.cpp
+++ b/src/kernel/replace_fn.cpp
@@ -12,23 +12,6 @@ Author: Leonardo de Moura
 
 namespace lean {
 
-/* Like `is_exclusive`, but also consider unique MT references as unshared, which ensures we get
- * similar performance on the cmdline and server (more precisely, for either option value of
- * `internal.cmdlineSnapshots`). Note that as `e` is merely *borrowed* (e.g. from the mctx in
- * the case of `instantiate_mvars` where the performance issue resolved here manifested, #5614),
- * it is in fact possible that another thread could simultaneously add a new direct reference to
- * `e`, so it is not definitely unshared in all cases if the below check is true.
- *
- * However, as we use this predicate merely as a conservative heuristic for detecting
- * expressions that are unshared *within the expression tree* at hand, the approximation is
- * still correct in this case. Furthermore, as we only use it for deciding when to cache
- * results, it ultimately does not affect the correctness of the overall procedure in any case.
- * This should however be kept in mind if we start using `is_likely_unshared` in other contexts.
- */
-static bool is_likely_unshared(expr const & e) {
-    return e.raw()->m_rc == 1 || e.raw()->m_rc == -1;
-}
-
 class replace_rec_fn {
     struct key_hasher {
         std::size_t operator()(std::pair<lean_object *, unsigned> const & p) const {
diff --git a/tests/bench/omega_stress.lean b/tests/bench/omega_stress.lean
new file mode 100644
index 0000000000..7d8a0d2550
--- /dev/null
+++ b/tests/bench/omega_stress.lean
@@ -0,0 +1,62 @@
+/-!
+This benchmark demonstrates creating a definition with many nested `omega` proofs, exercising
+components that traverse terms before `abstractProofs` runs.
+
+From https://leanprover.zulipchat.com/#narrow/channel/287929-mathlib4/topic/slow.20syntax.20linters/near/500291283
+By Bhavik Mehta
+Extracted originally to dsemonstrate an unused variables linter performance issue.
+-/
+
+def Hollom : Type := Nat × Nat × Nat
+
+def ofHollom : Hollom → Nat × Nat × Nat := id
+
+namespace Hollom
+
+inductive HollomOrder : Nat × Nat × Nat → Nat × Nat × Nat → Prop
+  | twice {x y n u v m : Nat} : m + 2 ≤ n → HollomOrder (x, y, n) (u, v, m)
+  | within {x y u v m : Nat} : x ≤ u → y ≤ v → HollomOrder (x, y, m) (u, v, m)
+  | next_min {x y u v m : Nat} : min x y + 1 ≤ min u v → HollomOrder (x, y, m + 1) (u, v, m)
+  | next_add {x y u v m : Nat} : x + y ≤ 2 * (u + v) → HollomOrder (x, y, m + 1) (u, v, m)
+
+set_option profiler true
+set_option profiler.threshold 10
+
+class Preorder (α : Type) extends LE α, LT α where
+  le_refl : ∀ a : α, a ≤ a
+  le_trans : ∀ a b c : α, a ≤ b → b ≤ c → a ≤ c
+  lt := fun a b => a ≤ b ∧ ¬b ≤ a
+  lt_iff_le_not_le : ∀ a b : α, a < b ↔ a ≤ b ∧ ¬b ≤ a := by intros; rfl
+
+class PartialOrder (α : Type) extends Preorder α where
+  le_antisymm : ∀ a b : α, a ≤ b → b ≤ a → a = b
+
+def test : PartialOrder Hollom where
+  le x y := HollomOrder (ofHollom x) (ofHollom y)
+  le_refl x := .within (Nat.le_refl _) (Nat.le_refl _)
+  le_trans
+  | _, _, _, .twice _, .twice _ => .twice (by omega)
+  | _, _, _, .twice _, .within _ _ => .twice (by omega)
+  | _, _, _, .twice _, .next_min _ => .twice (by omega)
+  | _, _, _, .twice _, .next_add _ => .twice (by omega)
+  | _, _, _, .within _ _, .twice _ => .twice (by omega)
+  | _, _, _, .within _ _, .within _ _ => .within (by omega) (by omega)
+  | _, _, _, .within _ _, .next_min _ => .next_min (by omega)
+  | _, _, _, .within _ _, .next_add _ => .next_add (by omega)
+  | _, _, _, .next_min _, .twice _ => .twice (by omega)
+  | _, _, _, .next_min _, .within _ _ => .next_min (by omega)
+  | _, _, _, .next_min _, .next_min _ => .twice (by omega)
+  | _, _, _, .next_min _, .next_add _ => .twice (by omega)
+  | _, _, _, .next_add _, .twice _ => .twice (by omega)
+  | _, _, _, .next_add _, .within _ _ => .next_add (by omega)
+  | _, _, _, .next_add _, .next_min _ => .twice (by omega)
+  | _, _, _, .next_add _, .next_add _ => .twice (by omega)
+  le_antisymm
+  | _, _, .twice _, .twice _ => by omega
+  | _, (_, _, _), .twice _, .within _ _ => by omega
+  | _, _, .twice _, .next_min _ => by omega
+  | _, _, .twice _, .next_add _ => by omega
+  | _, _, .within _ _, .twice _ => by omega
+  | _, _, .within _ _, .within _ _ => by congr 3 <;> omega
+  | _, _, .next_min _, .twice _ => by omega
+  | _, _, .next_add _, .twice _ => by omega
diff --git a/tests/bench/speedcenter.exec.velcom.yaml b/tests/bench/speedcenter.exec.velcom.yaml
index edb6dc0579..c2506fc8df 100644
--- a/tests/bench/speedcenter.exec.velcom.yaml
+++ b/tests/bench/speedcenter.exec.velcom.yaml
@@ -405,3 +405,9 @@
   run_config:
     <<: *time
     cmd: lean big_omega.lean -Dinternal.cmdlineSnapshots=false
+- attributes:
+    description: omega_stress.lean async
+    tags: [fast]
+  run_config:
+    <<: *time
+    cmd: lean omega_stress.lean -DElab.async=true