infoductor/Infoductor/Comonad/ExtractConsts.lean

/-
  ExtractConsts.lean — Proof term analysis toolkit.

  Given a proof term (or declaration name), extracts the "interesting" lemmas
  and constants it depends on. Useful for:
  - Understanding what a proof actually uses
  - Dependency analysis for proof refactoring
  - Feature extraction for ML-based proof search

  Ported from the Lean 3 `extract_consts.lean`, with the Mathematica
  dependency removed. This is pure Lean 4 metaprogramming.
-/

import Mathlib.Tactic

open Lean Meta Elab Command Term

namespace ExtractConsts

/-! ## Classifying constant names -/

/-- Check if a name refers to a recursor or eliminator. -/
def isRecursor (n : Name) : Bool :=
  match n with
  | .str _ "rec"                      => true
  | .str _ "recOn"                    => true
  | .str _ "rec_on"                   => true
  | .str _ "casesOn"                  => true
  | .str _ "cases_on"                 => true
  | .str _ "caseStrongInductionOn"    => true
  | .str _ "case_strong_induction_on" => true
  | .str _ "brecOn"                   => true
  | .str _ "below"                    => true
  | .str _ "ndrec"                    => true
  | .str _ "ndrecOn"                  => true
  | _ => false

/-- Constants that are structural plumbing, not mathematically interesting. -/
private def boringConsts : NameSet :=
  [
    `id, `id_locked, `congr, `congrArg, `congrFun,
    `propext, `funext, `Eq.mpr, `Eq.mp,
    `eq_self_iff_true, `forall_const,
    `nonempty_of_inhabited, `Classical.choice,
    `rfl, `Eq.refl, `Eq.symm, `Eq.trans,
    `True.intro, `False.elim,
    `absurd, `not_false_eq_true
  ].foldl (init := {}) fun s n => s.insert n

/-- Check if a constant is structurally boring (eq/or/and/iff/not helpers, etc). -/
def isBoring (n : Name) : Bool :=
  match n with
  | .str (.str _ "Eq") _         => true
  | .str (.str _ "Or") _         => true
  | .str (.str _ "And") _        => true
  | .str (.str _ "Decidable") _  => true
  | .str (.str _ "Iff") _        => true
  | .str (.str _ "Not") _        => true
  | .str (.str _ "HEq") _       => true
  -- Also match Lean 3-style lowercase (for mathlib compat)
  | .str (.str _ "eq") _         => true
  | .str (.str _ "or") _         => true
  | .str (.str _ "and") _        => true
  | .str (.str _ "decidable") _  => true
  | .str (.str _ "iff") _        => true
  | .str (.str _ "not") _        => true
  | _ => boringConsts.contains n

/-- A constant is "interesting" if it's not a recursor and not boring. -/
def isInteresting (n : Name) : Bool :=
  !(isRecursor n || isBoring n)

/-! ## Checking Prop-valued conclusions -/

/-- Check if `type` (a type expression) has a Prop-valued conclusion.
  Strips all leading `∀`-binders and checks if the body lives in `Prop`. -/
def isPropConclType (type : Expr) : MetaM Bool :=
  forallTelescopeReducing type fun _ body => do
    let sort ← inferType body
    return sort.isProp

/-- Check if the constant `n` has a Prop-valued conclusion. -/
def isPropConcl (n : Name) : MetaM Bool := do
  try
    let info ← getConstInfo n
    isPropConclType info.type
  catch _ => return false

/-! ## Extracting constants from expressions -/

/-- Collect all constant names in `e` whose types have Prop-valued conclusions.
  Uses `Expr.getUsedConstants` for efficient traversal. -/
def listPropConsts (e : Expr) : MetaM NameSet := do
  let allConsts := e.getUsedConstants
  let mut result : NameSet := {}
  for n in allConsts do
    if ← isPropConcl n then
      result := result.insert n
  return result

/-- Get the set of "interesting" Prop-valued constants used in `e`.
  Filters out recursors, boring structural lemmas, etc. -/
def getInterestingConsts (e : Expr) : MetaM NameSet := do
  let ns ← listPropConsts e
  let mut result : NameSet := {}
  for n in ns.toList do
    if isInteresting n then
      result := result.insert n
  return result

/-- Get `(name, type)` pairs for all interesting constants in `e`. -/
def getInterestingLemmasUsed (e : Expr) : MetaM (List (Name × Expr)) := do
  let ns ← getInterestingConsts e
  let mut result : List (Name × Expr) := []
  for n in ns.toList do
    try
      let info ← getConstInfo n
      result := (n, info.type) :: result
    catch _ => pure ()
  return result

/-! ## Walking expressions with binder opening -/

/-- Fold over all subexpressions of `e`, opening binders by introducing
  fresh local constants. This is the Lean 4 equivalent of the Lean 3
  `expr.mfold'` which instantiated bound variables.

  `f` is called on every subexpression with the current accumulator. -/
partial def foldExprOpen {α : Type} (e : Expr) (init : α)
    (f : Expr → α → MetaM α) : MetaM α := do
  let go := foldExprOpen
  match e with
  | .bvar _     => f e init
  | .sort _     => f e init
  | .fvar _     => f e init
  | .mvar id    =>
    let a ← f e init
    let type ← id.getType
    go type a f
  | .const ..   => f e init
  | .lit _      => f e init
  | .app e1 e2  =>
    let a ← f e init
    let a ← go e1 a f
    go e2 a f
  | .lam n t b bi =>
    let a ← f e init
    let a ← go t a f
    withLocalDecl n bi t fun x =>
      go (b.instantiate1 x) a f
  | .forallE n t b bi =>
    let a ← f e init
    let a ← go t a f
    withLocalDecl n bi t fun x =>
      go (b.instantiate1 x) a f
  | .letE n t v b _ =>
    let a ← f e init
    let a ← go t a f
    let a ← go v a f
    withLetDecl n t v fun x =>
      go (b.instantiate1 x) a f
  | .mdata _ b  => f e init >>= fun a => go b a f
  | .proj _ _ s => f e init >>= fun a => go s a f

/-- Collect all subexpressions of `e` that are proofs (i.e., have type in Prop). -/
def propSubterms (e : Expr) : MetaM (Array Expr) :=
  foldExprOpen e #[] fun sub acc => do
    try
      let type ← inferType sub
      let sort ← inferType type
      if sort.isProp then return acc.push sub
      else return acc
    catch _ => return acc

/-! ## Collecting interesting applications -/

/-- Check if `e` is an application whose head is a constant in `ics`. -/
def isAppOfInterestingConst (ics : NameSet) : Expr → Bool
  | .const n _ => ics.contains n
  | .app f _   => isAppOfInterestingConst ics f
  | _          => false

/-- Collect all applications in `e` whose head constant is in `ics`,
  whnf-reduced. Useful for seeing how interesting lemmas are instantiated. -/
partial def appsOfInterestingConsts (ics : NameSet) (e : Expr) :
    MetaM (Array Expr) := do
  let go := appsOfInterestingConsts ics
  match e with
  | .app f a =>
    if isAppOfInterestingConst ics f then
      let reduced ← whnf e
      let rest ← go a
      return #[reduced] ++ rest
    else
      let r1 ← go f
      let r2 ← go a
      return r1 ++ r2
  | .lam n t b bi =>
    let r1 ← go t
    withLocalDecl n bi t fun x => do
      let r2 ← go (b.instantiate1 x)
      return r1 ++ r2
  | .forallE n t b bi =>
    let r1 ← go t
    withLocalDecl n bi t fun x => do
      let r2 ← go (b.instantiate1 x)
      return r1 ++ r2
  | .letE n t v b _ =>
    let r1 ← go t
    let r2 ← go v
    withLetDecl n t v fun x => do
      let r3 ← go (b.instantiate1 x)
      return r1 ++ r2 ++ r3
  | .mvar id =>
    go (← id.getType)
  | _ => return #[]

/-! ## Pretty printing -/

/-- Pretty-print a list of `(name, type)` pairs, one per line. -/
def formatNameTypeList (entries : List (Name × Expr)) : MetaM String := do
  let parts ← entries.mapM fun (n, t) => do
    let tFmt ← ppExpr t
    return s!"{n} : {tFmt}"
  return "\n".intercalate parts

/-- Get a pretty-printed summary of interesting lemmas used in `e`. -/
def formatLemmasUsed (e : Expr) : MetaM String := do
  getInterestingLemmasUsed e >>= formatNameTypeList

/-! ## Analyzing declarations by name -/

/-- Get the proof term of a declaration (works for theorems and definitions). -/
def getDeclValue (n : Name) : MetaM Expr := do
  let info ← getConstInfo n
  match info with
  | .defnInfo v => return v.value
  | .thmInfo v  => return v.value
  | _ => throwError "'{n}' is not a definition or theorem"

/-- Analyze a declaration and return its interesting dependencies. -/
def analyzeDeclConsts (n : Name) : MetaM NameSet := do
  let v ← getDeclValue n
  getInterestingConsts v

/-- Analyze a declaration and return (name, type) pairs for its dependencies. -/
def analyzeDeclLemmas (n : Name) : MetaM (List (Name × Expr)) := do
  let v ← getDeclValue n
  getInterestingLemmasUsed v

/-- Pretty-print the interesting lemmas used by declaration `n`. -/
def analyzeDeclFormatted (n : Name) : MetaM String := do
  let v ← getDeclValue n
  formatLemmasUsed v

end ExtractConsts

/-! ## Interactive commands -/

open ExtractConsts in
/-- `#extract_consts declName` prints the interesting lemmas used in a proof. -/
elab "#extract_consts " n:ident : command => liftTermElabM do
  let name := n.getId
  -- Verify it exists
  let _ ← getConstInfo name
  let result ← analyzeDeclFormatted name
  if result.isEmpty then
    logInfo m!"No interesting constants found in '{name}'."
  else
    logInfo m!"Interesting lemmas used by '{name}':\n{result}"

open ExtractConsts in
/-- `#list_deps declName` lists all Prop-valued constants used in a proof. -/
elab "#list_deps " n:ident : command => liftTermElabM do
  let name := n.getId
  let v ← getDeclValue name
  let v ← getDeclValue name
  let ns ← listPropConsts v
  let names := ns.toList.map toString
  logInfo m!"Prop-valued constants in '{name}':\n{"\n".intercalate names}"

/-! ## Tests -/

section Tests

/-! ### Test theorems with varying levels of complexity -/

-- Trivial: no constants at all (just function application)
theorem test_trivial (p q : Prop) (h : p → q) (hp : p) : q := h hp

-- Only boring constants (And.left / And.right are filtered)
theorem test_boring (p q : Prop) (h : p ∧ q) : q ∧ p := ⟨h.2, h.1⟩

-- Uses Nat.succ_pos — a real lemma
theorem test_succ_pos : 0 < Nat.succ 5 := Nat.succ_pos 5

-- Uses Nat.add_comm — a real lemma
theorem test_add_comm (a b : Nat) : a + b = b + a := Nat.add_comm a b

-- Uses multiple interesting lemmas
theorem test_nat_arith (n : Nat) : n + 0 = 0 + n := by
  rw [Nat.add_zero, Nat.zero_add]

-- A proof that uses Or.elim (boring) but also classical reasoning
theorem test_em (p : Prop) : p ∨ ¬p := Classical.em p

-- Uses set extensionality — should surface Set.ext or similar
theorem test_set_inter_comm {α : Type*} (s t : Set α) : s ∩ t = t ∩ s := by
  ext x; constructor <;> intro h <;> exact ⟨h.2, h.1⟩

/-! ### Interactive command tests -/

-- Should report "no interesting constants" — proof is just `h hp`
#extract_consts test_trivial

-- Should report "no interesting constants" — only uses And.left/right (boring)
#extract_consts test_boring

-- Should find Nat.succ_pos
#extract_consts test_succ_pos

-- Should find Nat.add_comm
#extract_consts test_add_comm

-- Should find interesting lemmas from the rw
#extract_consts test_nat_arith

-- Should find Classical.em
#extract_consts test_em

-- Should find set-related lemmas
#extract_consts test_set_inter_comm

/-! ### Dependency listing tests -/

-- Lists ALL prop-valued constants (including boring ones)
#list_deps test_succ_pos
#list_deps test_add_comm

/-! ### Programmatic API tests -/

-- Test the classifier functions directly
#eval ExtractConsts.isRecursor `Nat.rec           -- true
#eval ExtractConsts.isRecursor `Nat.casesOn       -- true
#eval ExtractConsts.isRecursor `Nat.add_comm      -- false

#eval ExtractConsts.isBoring `And.intro            -- true
#eval ExtractConsts.isBoring `Eq.symm              -- true
#eval ExtractConsts.isBoring `Nat.add_comm         -- false

#eval ExtractConsts.isInteresting `Nat.add_comm    -- true
#eval ExtractConsts.isInteresting `And.intro        -- false
#eval ExtractConsts.isInteresting `Nat.rec          -- false

-- Test isPropConcl: Nat.add_comm has type ∀ n m, n + m = m + n (Prop conclusion)
#eval Lean.Meta.MetaM.run' do return ← ExtractConsts.isPropConcl `Nat.add_comm  -- true

-- Test isPropConcl: Nat.succ is Nat → Nat (not Prop)
#eval Lean.Meta.MetaM.run' do return ← ExtractConsts.isPropConcl `Nat.succ      -- false

-- Test getInterestingConsts programmatically
run_cmd liftTermElabM do
  let v ← ExtractConsts.getDeclValue ``test_succ_pos
  let ns ← ExtractConsts.getInterestingConsts v
  let names := ns.toList.map toString
  Lean.logInfo m!"Interesting consts in test_succ_pos: {names}"

run_cmd liftTermElabM do
  let v ← ExtractConsts.getDeclValue ``test_add_comm
  let ns ← ExtractConsts.getInterestingConsts v
  let names := ns.toList.map toString
  Lean.logInfo m!"Interesting consts in test_add_comm: {names}"

-- Test listPropConsts (unfiltered — includes boring ones)
run_cmd liftTermElabM do
  let v ← ExtractConsts.getDeclValue ``test_boring
  let ns ← ExtractConsts.listPropConsts v
  let names := ns.toList.map toString
  Lean.logInfo m!"All prop consts in test_boring: {names}"

-- Test getInterestingLemmasUsed with types
run_cmd liftTermElabM do
  let entries ← ExtractConsts.analyzeDeclLemmas ``test_nat_arith
  for (n, _) in entries do
    Lean.logInfo m!"  {n}"

end Tests