From eb6c52e7e2c7c2830f85e5d0dafbbe1da50a9acd Mon Sep 17 00:00:00 2001
From: Leonardo de Moura <leomoura@amazon.com>
Date: Sat, 21 Dec 2024 23:32:25 +0100
Subject: [PATCH] feat: canonicalizer for the `grind` tactic (#6433)

This PR adds a custom type and instance canonicalizer for the (WIP)
`grind` tactic. The `grind` tactic uses congruence closure but
disregards types, type formers, instances, and proofs. Proofs are
ignored due to proof irrelevance. Types, type formers, and instances are
considered supporting elements and are not factored into congruence
detection. Instead, `grind` only checks whether elements are
structurally equal, which, in the context of the `grind` tactic, is
equivalent to pointer equality. See new tests for examples where the
canonicalizer is important.
---
 src/Lean/Meta/Tactic/Grind.lean       |   1 +
 src/Lean/Meta/Tactic/Grind/Canon.lean | 150 ++++++++++++++++++++++++++
 src/Lean/Meta/Tactic/Grind/Types.lean |  20 +---
 tests/lean/run/grindCanonInsts.lean   |  87 +++++++++++++++
 tests/lean/run/grindCanonTypes.lean   |  28 +++++
 5 files changed, 270 insertions(+), 16 deletions(-)
 create mode 100644 src/Lean/Meta/Tactic/Grind/Canon.lean
 create mode 100644 tests/lean/run/grindCanonInsts.lean
 create mode 100644 tests/lean/run/grindCanonTypes.lean

diff --git a/src/Lean/Meta/Tactic/Grind.lean b/src/Lean/Meta/Tactic/Grind.lean
index 3423737550..eb330d1428 100644
--- a/src/Lean/Meta/Tactic/Grind.lean
+++ b/src/Lean/Meta/Tactic/Grind.lean
@@ -12,6 +12,7 @@ import Lean.Meta.Tactic.Grind.Util
 import Lean.Meta.Tactic.Grind.Cases
 import Lean.Meta.Tactic.Grind.Injection
 import Lean.Meta.Tactic.Grind.Core
+import Lean.Meta.Tactic.Grind.Canon
 
 namespace Lean
 
diff --git a/src/Lean/Meta/Tactic/Grind/Canon.lean b/src/Lean/Meta/Tactic/Grind/Canon.lean
new file mode 100644
index 0000000000..89df6f4df4
--- /dev/null
+++ b/src/Lean/Meta/Tactic/Grind/Canon.lean
@@ -0,0 +1,150 @@
+/-
+Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Leonardo de Moura
+-/
+prelude
+import Lean.Meta.Basic
+import Lean.Meta.FunInfo
+import Lean.Util.FVarSubset
+import Lean.Util.PtrSet
+import Lean.Util.FVarSubset
+
+namespace Lean.Meta.Grind
+namespace Canon
+
+/-!
+A canonicalizer module for the `grind` tactic. The canonicalizer defined in `Meta/Canonicalizer.lean` is
+not suitable for the `grind` tactic. It was designed for tactics such as `omega`, where the goal is
+to detect when two structurally different atoms are definitionally equal.
+
+The `grind` tactic, on the other hand, uses congruence closure but disregards types, type formers, instances, and proofs.
+Proofs are ignored due to proof irrelevance. Types, type formers, and instances are considered supporting
+elements and are not factored into congruence detection. Instead, `grind` only checks whether
+elements are structurally equal, which, in the context of the `grind` tactic, is equivalent to pointer equality.
+
+This module minimizes the number of `isDefEq` checks by comparing two terms `a` and `b` only if they instances,
+types, or type formers and are the `i`-th arguments of two different `f`-applications. This approach is
+sufficient for the congruence closure procedure used by the `grind` tactic.
+
+To further optimize `isDefEq` checks, instances are compared using `TransparencyMode.instances`, which reduces
+the number of constants that need to be unfolded. If diagnostics are enabled, instances are compared using
+the default transparency mode too for sanity checking, and discrepancies are reported.
+Types and type formers are always checked using default transparency.
+-/
+
+structure State where
+  argMap : PHashMap (Expr × Nat) (List Expr) := {}
+  canon  : PHashMap Expr Expr := {}
+  deriving Inhabited
+
+/--
+Helper function for canonicalizing `e` occurring as the `i`th argument of an `f`-application.
+`isInst` is true if `e` is an type class instance.
+
+Recall that we use `TransparencyMode.instances` for checking whether two instances are definitionally equal or not.
+Thus, if diagnostics are enabled, we also check them using `TransparencyMode.default`. If the result is different
+we report to the user.
+-/
+def canonElemCore (f : Expr) (i : Nat) (e : Expr) (isInst : Bool) : StateT State MetaM Expr := do
+  let s ← get
+  if let some c := s.canon.find? e then
+    return c
+  let key := (f, i)
+  let cs := s.argMap.find? key |>.getD []
+  for c in cs do
+    if (← isDefEq e c) then
+      if c.fvarsSubset e then
+        -- It is not in general safe to replace `e` with `c` if `c` has more free variables than `e`.
+        modify fun s => { s with canon := s.canon.insert e c }
+        return c
+    if isInst then
+      if (← isDiagnosticsEnabled <&&> pure (c.fvarsSubset e) <&&> (withDefault <| isDefEq e c)) then
+        -- TODO: consider storing this information in some structure that can be browsed later.
+        trace[grind.issues] "the following `grind` static elements are definitionally equal with `default` transparency, but not with `instances` transparency{indentExpr e}\nand{indentExpr c}"
+  modify fun s => { s with canon := s.canon.insert e e, argMap := s.argMap.insert key (e::cs) }
+  return e
+
+abbrev canonType (f : Expr) (i : Nat) (e : Expr) := withDefault <| canonElemCore f i e false
+abbrev canonInst (f : Expr) (i : Nat) (e : Expr) := withReducibleAndInstances <| canonElemCore f i e true
+
+/--
+Return type for the `shouldCanon` function.
+-/
+private inductive ShouldCanonResult where
+  | /-
+    Nested proofs are ignored by the canonizer.
+    That is, they are not canonized or recursively visited.
+    -/
+    ignore
+  | /- Nested types (and type formers) are canonicalized. -/
+    canonType
+  | /- Nested instances are canonicalized. -/
+    canonInst
+  | /-
+    Term is not a proof, type (former), nor an instance.
+    Thus, it must be recursively visited by the canonizer.
+    -/
+    visit
+  deriving Inhabited
+
+/--
+See comments at `ShouldCanonResult`.
+-/
+def shouldCanon (pinfos : Array ParamInfo) (i : Nat) (arg : Expr) : MetaM ShouldCanonResult := do
+  if h : i < pinfos.size then
+    let pinfo := pinfos[i]
+    if pinfo.isInstImplicit then
+      return .canonInst
+    else if pinfo.isProp then
+      return .ignore
+  if (← isTypeFormer arg) then
+    return .canonType
+  else
+    return .visit
+
+unsafe def canonImpl (e : Expr) : StateT State MetaM Expr := do
+  visit e |>.run' mkPtrMap
+where
+  visit (e : Expr) : StateRefT (PtrMap Expr Expr) (StateT State MetaM) Expr := do
+    match e with
+    | .bvar .. => unreachable!
+    -- Recall that `grind` treats `let`, `forall`, and `lambda` as atomic terms.
+    | .letE .. | .forallE .. | .lam ..
+    | .const .. | .lit .. | .mvar .. | .sort .. | .fvar ..
+    -- Recall that the `grind` preprocessor uses the `foldProjs` preprocessing step.
+    | .proj ..
+    -- Recall that the `grind` preprocessor uses the `eraseIrrelevantMData` preprocessing step.
+    | .mdata ..  => return e
+    -- We only visit applications
+    | .app .. =>
+      -- Check whether it is cached
+      if let some r := (← get).find? e then
+        return r
+      e.withApp fun f args => do
+        let pinfos := (← getFunInfo f).paramInfo
+        let mut modified := false
+        let mut args := args
+        for i in [:args.size] do
+          let arg := args[i]!
+          let arg' ← match (← shouldCanon pinfos i arg) with
+          | .ignore    => pure arg
+          | .canonType => canonType f i arg
+          | .canonInst => canonInst f i arg
+          | .visit     => visit arg
+          unless ptrEq arg arg' do
+            args := args.set! i arg'
+            modified := true
+        let e' := if modified then mkAppN f args else e
+        modify fun s => s.insert e e'
+        return e'
+
+/--
+Canonicalizes nested types, type formers, and instances in `e`.
+-/
+def canon (e : Expr) : StateT State MetaM Expr :=
+  unsafe canonImpl e
+
+end Canon
+
+end Lean.Meta.Grind
diff --git a/src/Lean/Meta/Tactic/Grind/Types.lean b/src/Lean/Meta/Tactic/Grind/Types.lean
index 441b4426d4..640a290171 100644
--- a/src/Lean/Meta/Tactic/Grind/Types.lean
+++ b/src/Lean/Meta/Tactic/Grind/Types.lean
@@ -8,7 +8,7 @@ import Lean.Util.ShareCommon
 import Lean.Meta.Basic
 import Lean.Meta.CongrTheorems
 import Lean.Meta.AbstractNestedProofs
-import Lean.Meta.Canonicalizer
+import Lean.Meta.Tactic.Grind.Canon
 import Lean.Meta.Tactic.Util
 
 namespace Lean.Meta.Grind
@@ -37,7 +37,7 @@ instance : Hashable CongrTheoremCacheKey where
   hash a := mixHash (unsafe ptrAddrUnsafe a.f).toUInt64 (hash a.numArgs)
 
 structure State where
-  canon      : Canonicalizer.State := {}
+  canon      : Canon.State := {}
   /-- `ShareCommon` (aka `Hashconsing`) state. -/
   scState    : ShareCommon.State.{0} ShareCommon.objectFactory := ShareCommon.State.mk _
   /-- Next index for creating auxiliary theorems. -/
@@ -87,25 +87,13 @@ def shareCommon (e : Expr) : GrindM Expr := do
     (e, { canon, scState, nextThmIdx, congrThms, trueExpr, falseExpr })
 
 /--
-Applies the canonicalizer to all subterms of `e`.
+Canonicalizes nested types, type formers, and instances in `e`.
 -/
--- TODO: the current canonicalizer is not a good solution for `grind`.
--- The problem is that two different applications `@f inst_1 a` and `@f inst_2 b`
--- may still have syntaticaally different instances. Thus, if we learn that `a = b`,
--- congruence closure will fail to see that the two applications are congruent.
 def canon (e : Expr) : GrindM Expr := do
   let canonS ← modifyGet fun s => (s.canon, { s with canon := {} })
-  let (e, canonS) ← Canonicalizer.CanonM.run (canonRec e) (s := canonS)
+  let (e, canonS) ← Canon.canon e |>.run canonS
   modify fun s => { s with canon := canonS }
   return e
-where
-  canonRec (e : Expr) : CanonM Expr := do
-    let post (e : Expr) : CanonM TransformStep := do
-      if e.isApp then
-        return .done (← Meta.canon e)
-      else
-        return .done e
-    transform e post
 
 /--
 Creates a congruence theorem for a `f`-applications with `numArgs` arguments.
diff --git a/tests/lean/run/grindCanonInsts.lean b/tests/lean/run/grindCanonInsts.lean
new file mode 100644
index 0000000000..59b5b481f4
--- /dev/null
+++ b/tests/lean/run/grindCanonInsts.lean
@@ -0,0 +1,87 @@
+import Lean
+
+open Lean Meta Elab Tactic Grind in
+elab "grind_test" : tactic => withMainContext do
+  let declName := (← Term.getDeclName?).getD `_main
+  Meta.Grind.preprocessAndProbe (← getMainGoal) declName do
+    let nodes ← filterENodes fun e => return e.self.isAppOf ``HMul.hMul
+    logInfo (nodes.toList.map (·.self))
+
+class Semigroup (α : Type u) extends Mul α where
+  mul_assoc (a b c : α) : a * b * c = a * (b * c)
+
+export Semigroup (mul_assoc)
+
+class MulComm (α : Type u)  extends Mul α where
+  mul_comm (a b : α) : a * b = b * a
+
+export MulComm (mul_comm)
+
+class CommSemigroup (α : Type u) extends Semigroup α where
+  mul_comm (a b : α) : a * b = b * a
+
+instance [CommSemigroup α] : MulComm α where
+  mul_comm := CommSemigroup.mul_comm
+
+class One (α : Type u) where
+  one : α
+
+instance [One α] : OfNat α (nat_lit 1) where
+  ofNat := One.one
+
+class Monoid (α : Type u) extends Semigroup α, One α where
+  one_mul (a : α) : 1 * a = a
+  mul_one (a : α) : a * 1 = a
+
+export Monoid (one_mul mul_one)
+
+class CommMonoid (α : Type u) extends Monoid α where
+  mul_comm (a b : α) : a * b = b * a
+
+instance [CommMonoid α] : CommSemigroup α where
+  mul_comm := CommMonoid.mul_comm
+
+instance [CommMonoid α] : MulComm α where
+  mul_comm := CommSemigroup.mul_comm
+
+instance : CommMonoid Nat where
+  mul := Nat.mul
+  one := 1
+  mul_assoc := Nat.mul_assoc
+  mul_comm  := Nat.mul_comm
+  one_mul   := Nat.one_mul
+  mul_one   := Nat.mul_one
+
+theorem left_comm [CommMonoid α] (a b c : α) : a * (b * c) = b * (a * c) := by
+  rw [← Semigroup.mul_assoc, CommMonoid.mul_comm a b, Semigroup.mul_assoc]
+
+/--
+info: [b * c, a * (b * c), d * (b * c)]
+---
+warning: declaration uses 'sorry'
+-/
+#guard_msgs in
+example (a b c d : Nat) : b * (a * c) = d * (b * c) → False := by
+  rw [left_comm] -- Introduces a new (non-canonical) instance for `Mul Nat`
+  grind_test -- State should have only 3 `*`-applications
+  sorry
+
+
+set_option pp.notation false in
+set_option pp.explicit true in
+/--
+info: [@HMul.hMul Nat Nat Nat (@instHMul Nat instMulNat) b a, @HMul.hMul Nat Nat Nat (@instHMul Nat instMulNat) b d]
+---
+warning: declaration uses 'sorry'
+-/
+#guard_msgs in
+example (a b c d : Nat) : b * a = d * b → False := by
+  rw [CommMonoid.mul_comm d b] -- Introduces a new (non-canonical) instance for `Mul Nat`
+  -- See target here
+  guard_target =ₛ
+    @HMul.hMul Nat Nat Nat (@instHMul Nat instMulNat) b a
+    =
+    @HMul.hMul Nat Nat Nat (@instHMul Nat (@Semigroup.toMul Nat (@Monoid.toSemigroup Nat (@CommMonoid.toMonoid Nat instCommMonoidNat)))) b d
+    → False
+  grind_test -- State should have only 2 `*`-applications, and they use the same instance
+  sorry
diff --git a/tests/lean/run/grindCanonTypes.lean b/tests/lean/run/grindCanonTypes.lean
new file mode 100644
index 0000000000..a7f52acb7f
--- /dev/null
+++ b/tests/lean/run/grindCanonTypes.lean
@@ -0,0 +1,28 @@
+import Lean
+
+
+def g (s : Type) := s
+def f (a : α) := a
+
+open Lean Meta Elab Tactic Grind in
+elab "grind_test" : tactic => withMainContext do
+  let declName := (← Term.getDeclName?).getD `_main
+  Meta.Grind.preprocessAndProbe (← getMainGoal) declName do
+    let nodes ← filterENodes fun e => return e.self.isAppOf ``f
+    logInfo (nodes.toList.map (·.self))
+
+
+set_option pp.explicit true
+/--
+info: [@f Nat a, @f Nat b]
+---
+warning: declaration uses 'sorry'
+-/
+#guard_msgs in
+example (a b c d : Nat) : @f Nat a = b → @f (g Nat) a = c → @f (g Nat) b = d → a = b → False := by
+  -- State should have only two `f`-applications: `@f Nat a`, `@f Nat b`
+  -- Note that `@f (g Nat) b` has been canonicalized to `@f Nat b`.
+  -- Thus, if `a` and `b` equivalence classes are merged, `grind` can still detect that
+  -- `@f Nat a` and `@f Nat b` are equal too.
+  grind_test
+  sorry