feat: basic premise selection algorithm based on MePo (#7844)
This PR adds a simple implementation of MePo, from "Lightweight relevance filtering for machine-generated resolution problems" by Meng and Paulson. This needs tuning, but is already useful as a baseline or test case. --------- Co-authored-by: Thomas Zhu <thomas.zhu.sh@hotmail.com>
This commit is contained in:
parent
27fa5b0bb5
commit
0807f73171
5 changed files with 302 additions and 133 deletions
|
|
@ -8,5 +8,3 @@ module
|
|||
prelude
|
||||
public import Lean.Data.NameMap.Basic
|
||||
public import Lean.Data.NameMap.AdditionalOperations
|
||||
|
||||
public section
|
||||
|
|
|
|||
|
|
@ -6,134 +6,5 @@ Authors: Kim Morrison
|
|||
module
|
||||
|
||||
prelude
|
||||
public import Lean.Elab.Command
|
||||
public import Lean.Meta.Eval
|
||||
public import Lean.Meta.CompletionName
|
||||
public import Init.Data.Random
|
||||
|
||||
public section
|
||||
|
||||
/-!
|
||||
# An API for premise selection algorithms.
|
||||
|
||||
This module provides a basic API for premise selection algorithms,
|
||||
which are used to suggest identifiers that should be introduced in a proof.
|
||||
|
||||
The core interface is the `Selector` type, which is a function from a metavariable
|
||||
and a configuration to a list of suggestions.
|
||||
|
||||
The `Selector` is registered as an environment extension, and the trivial (no suggestions) implementation
|
||||
is `Lean.PremiseSelection.empty`.
|
||||
|
||||
Lean does not provide a default premise selector, so this module is intended to be used in conjunction
|
||||
with a downstream package which registers a premise selector.
|
||||
-/
|
||||
|
||||
namespace Lean.PremiseSelection
|
||||
|
||||
/--
|
||||
A `Suggestion` is essentially just an identifier and a confidence score that the identifier is relevant.
|
||||
If the premise selection request included information about the intended use (e.g. in the simplifier, in `grind`, etc.)
|
||||
the score may be adjusted for that application.
|
||||
|
||||
-/
|
||||
structure Suggestion where
|
||||
name : Name
|
||||
/--
|
||||
The score of the suggestion, as a probability that this suggestion should be used.
|
||||
-/
|
||||
score : Float
|
||||
|
||||
structure Config where
|
||||
/--
|
||||
The maximum number of suggestions to return.
|
||||
-/
|
||||
maxSuggestions : Option Nat := none
|
||||
/--
|
||||
The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`.
|
||||
This may be used to adjust the score of the suggestions
|
||||
-/
|
||||
caller : Option Name := none
|
||||
/--
|
||||
A filter on suggestions; only suggestions returning `true` should be returned.
|
||||
(It can be better to filter on the premise selection side, to ensure that enough suggestions are returned.)
|
||||
-/
|
||||
filter : Name → MetaM Bool := fun _ => pure true
|
||||
/--
|
||||
An optional arbitrary "hint" to the premise selection algorithm.
|
||||
There is no guarantee that the algorithm will make any use of the hint.
|
||||
|
||||
Potential use cases include a natural language comment provided by the user
|
||||
(e.g. allowing use of the premise selector as a search engine)
|
||||
or including context from the current proof and/or file.
|
||||
|
||||
We may later split these use cases into separate fields if necessary.
|
||||
-/
|
||||
hint : Option String := none
|
||||
|
||||
abbrev Selector : Type := MVarId → Config → MetaM (Array Suggestion)
|
||||
|
||||
/--
|
||||
The trivial premise selector, which returns no suggestions.
|
||||
-/
|
||||
def empty : Selector := fun _ _ => pure #[]
|
||||
|
||||
/-- A random premise selection algorithm, provided solely for testing purposes. -/
|
||||
def random (gen : StdGen := ⟨37, 59⟩) : Selector := fun _ cfg => do
|
||||
IO.stdGenRef.set gen
|
||||
let env ← getEnv
|
||||
let max := cfg.maxSuggestions.getD 10
|
||||
let consts := env.const2ModIdx.keysArray
|
||||
let mut suggestions := #[]
|
||||
while suggestions.size < max do
|
||||
let i ← IO.rand 0 consts.size
|
||||
let name := consts[i]!
|
||||
if ! (`Lean).isPrefixOf name && Lean.Meta.allowCompletion env name then
|
||||
suggestions := suggestions.push { name := name, score := 1.0 / consts.size.toFloat }
|
||||
return suggestions
|
||||
|
||||
initialize premiseSelectorExt : EnvExtension (Option Selector) ←
|
||||
registerEnvExtension (pure none)
|
||||
|
||||
/-- Generate premise suggestions for the given metavariable, using the currently registered premise selector. -/
|
||||
def select (m : MVarId) (c : Config := {}) : MetaM (Array Suggestion) := do
|
||||
let some selector := premiseSelectorExt.getState (← getEnv) |
|
||||
throwError "No premise selector registered. \
|
||||
(Note the Lean does not provide a default premise selector, these must be installed by a downstream library.)"
|
||||
selector m c
|
||||
|
||||
/-!
|
||||
Currently the registration mechanism is just global state.
|
||||
This means that if multiple modules register premise selectors,
|
||||
the behaviour will be dependent on the order of loading modules.
|
||||
|
||||
We should replace this with a mechanism so that
|
||||
premise selectors are configured via options in the `lakefile`, and
|
||||
commands are only used to override in a single declaration or file.
|
||||
-/
|
||||
|
||||
/-- Set the current premise selector.-/
|
||||
def registerPremiseSelector (selector : Selector) : CoreM Unit := do
|
||||
modifyEnv fun env => premiseSelectorExt.setState env (some selector)
|
||||
|
||||
open Lean Elab Command in
|
||||
@[builtin_command_elab setPremiseSelectorCmd, inherit_doc setPremiseSelectorCmd]
|
||||
def elabSetPremiseSelector : CommandElab
|
||||
| `(command| set_premise_selector $selector) => do
|
||||
let selector ← liftTermElabM do
|
||||
try
|
||||
let selectorTerm ← Term.elabTermEnsuringType selector (some (Expr.const ``Selector []))
|
||||
unsafe Meta.evalExpr Selector (Expr.const ``Selector []) selectorTerm
|
||||
catch _ =>
|
||||
throwError "Failed to elaborate {selector} as a `MVarId → Config → MetaM (Array Suggestion)`."
|
||||
liftCoreM (registerPremiseSelector selector)
|
||||
| _ => throwUnsupportedSyntax
|
||||
|
||||
open Lean.Elab.Tactic in
|
||||
@[builtin_tactic Lean.Parser.Tactic.suggestPremises] def evalSuggestPremises : Tactic := fun _ =>
|
||||
liftMetaTactic1 fun mvarId => do
|
||||
let suggestions ← select mvarId
|
||||
logInfo m!"Premise suggestions: {suggestions.map (·.name)}"
|
||||
return mvarId
|
||||
|
||||
end Lean.PremiseSelection
|
||||
import Lean.PremiseSelection.Basic
|
||||
import Lean.PremiseSelection.MePo
|
||||
|
|
|
|||
180
src/Lean/PremiseSelection/Basic.lean
Normal file
180
src/Lean/PremiseSelection/Basic.lean
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
/-
|
||||
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Kim Morrison
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Lean.Elab.Command
|
||||
public import Lean.Meta.Eval
|
||||
public import Lean.Meta.CompletionName
|
||||
public import Init.Data.Random
|
||||
|
||||
/-!
|
||||
# An API for premise selection algorithms.
|
||||
|
||||
This module provides a basic API for premise selection algorithms,
|
||||
which are used to suggest identifiers that should be introduced in a proof.
|
||||
|
||||
The core interface is the `Selector` type, which is a function from a metavariable
|
||||
and a configuration to a list of suggestions.
|
||||
|
||||
The `Selector` is registered as an environment extension, and the trivial (no suggestions) implementation
|
||||
is `Lean.PremiseSelection.empty`.
|
||||
|
||||
Lean does not provide a default premise selector, so this module is intended to be used in conjunction
|
||||
with a downstream package which registers a premise selector.
|
||||
-/
|
||||
|
||||
@[expose] public section
|
||||
|
||||
namespace Lean.PremiseSelection
|
||||
|
||||
/--
|
||||
A `Suggestion` is essentially just an identifier and a confidence score that the identifier is relevant.
|
||||
If the premise selection request included information about the intended use (e.g. in the simplifier, in `grind`, etc.)
|
||||
the score may be adjusted for that application.
|
||||
|
||||
-/
|
||||
structure Suggestion where
|
||||
name : Name
|
||||
/--
|
||||
The score of the suggestion, as a probability that this suggestion should be used.
|
||||
-/
|
||||
score : Float
|
||||
|
||||
structure Config where
|
||||
/--
|
||||
The maximum number of suggestions to return.
|
||||
-/
|
||||
maxSuggestions : Option Nat := none
|
||||
/--
|
||||
The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`.
|
||||
This may be used to adjust the score of the suggestions
|
||||
-/
|
||||
caller : Option Name := none
|
||||
/--
|
||||
A filter on suggestions; only suggestions returning `true` should be returned.
|
||||
(It can be better to filter on the premise selection side, to ensure that enough suggestions are returned.)
|
||||
-/
|
||||
filter : Name → MetaM Bool := fun _ => pure true
|
||||
/--
|
||||
An optional arbitrary "hint" to the premise selection algorithm.
|
||||
There is no guarantee that the algorithm will make any use of the hint.
|
||||
|
||||
Potential use cases include a natural language comment provided by the user
|
||||
(e.g. allowing use of the premise selector as a search engine)
|
||||
or including context from the current proof and/or file.
|
||||
|
||||
We may later split these use cases into separate fields if necessary.
|
||||
-/
|
||||
hint : Option String := none
|
||||
|
||||
abbrev Selector : Type := MVarId → Config → MetaM (Array Suggestion)
|
||||
|
||||
section DenyList
|
||||
|
||||
/-- Premises from a module whose name has one of the following components are not retrieved. -/
|
||||
builtin_initialize moduleDenyListExt : SimplePersistentEnvExtension String (List String) ←
|
||||
registerSimplePersistentEnvExtension {
|
||||
addEntryFn := (·.cons)
|
||||
addImportedFn := mkStateFromImportedEntries (·.cons) []
|
||||
}
|
||||
|
||||
/-- A premise whose name has one of the following components is not retrieved. -/
|
||||
builtin_initialize nameDenyListExt : SimplePersistentEnvExtension String (List String) ←
|
||||
registerSimplePersistentEnvExtension {
|
||||
addEntryFn := (·.cons)
|
||||
addImportedFn := mkStateFromImportedEntries (·.cons) []
|
||||
}
|
||||
|
||||
/-- A premise whose `type.getForallBody.getAppFn` is a constant that has one of these prefixes is not retrieved. -/
|
||||
builtin_initialize typePrefixDenyListExt : SimplePersistentEnvExtension Name (List Name) ←
|
||||
registerSimplePersistentEnvExtension {
|
||||
addEntryFn := (·.cons)
|
||||
addImportedFn := mkStateFromImportedEntries (·.cons) []
|
||||
}
|
||||
|
||||
def isDeniedModule (env : Environment) (moduleName : Name) : Bool :=
|
||||
(moduleDenyListExt.getState env).any fun p => moduleName.anyS (· == p)
|
||||
|
||||
def isDeniedPremise (env : Environment) (name : Name) : Bool := Id.run do
|
||||
if name == ``sorryAx then return true
|
||||
if name.isInternalDetail then return true
|
||||
if (nameDenyListExt.getState env).any (fun p => name.anyS (· == p)) then return true
|
||||
if let some moduleIdx := env.getModuleIdxFor? name then
|
||||
let moduleName := env.header.moduleNames[moduleIdx.toNat]!
|
||||
if isDeniedModule env moduleName then
|
||||
return true
|
||||
let some ci := env.find? name | return true
|
||||
if let .const fnName _ := ci.type.getForallBody.getAppFn then
|
||||
if (typePrefixDenyListExt.getState env).any (fun p => p.isPrefixOf fnName) then return true
|
||||
return false
|
||||
|
||||
end DenyList
|
||||
|
||||
/--
|
||||
The trivial premise selector, which returns no suggestions.
|
||||
-/
|
||||
def empty : Selector := fun _ _ => pure #[]
|
||||
|
||||
/-- A random premise selection algorithm, provided solely for testing purposes. -/
|
||||
def random (gen : StdGen := ⟨37, 59⟩) : Selector := fun _ cfg => do
|
||||
IO.stdGenRef.set gen
|
||||
let env ← getEnv
|
||||
let max := cfg.maxSuggestions.getD 10
|
||||
let consts := env.const2ModIdx.keysArray
|
||||
let mut suggestions := #[]
|
||||
while suggestions.size < max do
|
||||
let i ← IO.rand 0 consts.size
|
||||
let name := consts[i]!
|
||||
unless isDeniedPremise env name do
|
||||
suggestions := suggestions.push { name := name, score := 1.0 / consts.size.toFloat }
|
||||
return suggestions
|
||||
|
||||
builtin_initialize premiseSelectorExt : EnvExtension (Option Selector) ←
|
||||
registerEnvExtension (pure none)
|
||||
|
||||
/-- Generate premise suggestions for the given metavariable, using the currently registered premise selector. -/
|
||||
def select (m : MVarId) (c : Config := {}) : MetaM (Array Suggestion) := do
|
||||
let some selector := premiseSelectorExt.getState (← getEnv) |
|
||||
throwError "No premise selector registered. \
|
||||
(Note the Lean does not provide a default premise selector, these must be installed by a downstream library.)"
|
||||
selector m c
|
||||
|
||||
/-!
|
||||
Currently the registration mechanism is just global state.
|
||||
This means that if multiple modules register premise selectors,
|
||||
the behaviour will be dependent on the order of loading modules.
|
||||
|
||||
We should replace this with a mechanism so that
|
||||
premise selectors are configured via options in the `lakefile`, and
|
||||
commands are only used to override in a single declaration or file.
|
||||
-/
|
||||
|
||||
/-- Set the current premise selector.-/
|
||||
def registerPremiseSelector (selector : Selector) : CoreM Unit := do
|
||||
modifyEnv fun env => premiseSelectorExt.setState env (some selector)
|
||||
|
||||
open Lean Elab Command in
|
||||
@[builtin_command_elab setPremiseSelectorCmd, inherit_doc setPremiseSelectorCmd]
|
||||
def elabSetPremiseSelector : CommandElab
|
||||
| `(command| set_premise_selector $selector) => do
|
||||
let selector ← liftTermElabM do
|
||||
try
|
||||
let selectorTerm ← Term.elabTermEnsuringType selector (some (Expr.const ``Selector []))
|
||||
unsafe Meta.evalExpr Selector (Expr.const ``Selector []) selectorTerm
|
||||
catch _ =>
|
||||
throwError "Failed to elaborate {selector} as a `MVarId → Config → MetaM (Array Suggestion)`."
|
||||
liftCoreM (registerPremiseSelector selector)
|
||||
| _ => throwUnsupportedSyntax
|
||||
|
||||
open Lean.Elab.Tactic in
|
||||
@[builtin_tactic Lean.Parser.Tactic.suggestPremises] def evalSuggestPremises : Tactic := fun _ =>
|
||||
liftMetaTactic1 fun mvarId => do
|
||||
let suggestions ← select mvarId
|
||||
logInfo m!"Premise suggestions: {suggestions.map (·.name)}"
|
||||
return mvarId
|
||||
|
||||
end Lean.PremiseSelection
|
||||
94
src/Lean/PremiseSelection/MePo.lean
Normal file
94
src/Lean/PremiseSelection/MePo.lean
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/-
|
||||
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Kim Morrison
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Lean.PremiseSelection.Basic
|
||||
import Lean.Util.FoldConsts
|
||||
import Lean.Meta.Basic
|
||||
|
||||
/-!
|
||||
# MePo premise selection
|
||||
|
||||
This is a naive implement of the MePo premise selection algorithm, from
|
||||
"Lightweight relevance filtering for machine-generated resolution problems" by Meng and Paulson.
|
||||
|
||||
It needs to be tuned and evaluated for Lean.
|
||||
-/
|
||||
|
||||
open Lean
|
||||
|
||||
namespace Lean.PremiseSelection.MePo
|
||||
|
||||
def symbolFrequency (env : Environment) : NameMap Nat := Id.run do
|
||||
let mut map := {}
|
||||
for (_, ci) in env.constants do
|
||||
for n' in ci.type.getUsedConstantsAsSet do
|
||||
map := map.alter n' fun i? => some (i?.getD 0 + 1)
|
||||
return map
|
||||
|
||||
def weightedScore (weight : Name → Float) (relevant candidate : NameSet) : Float :=
|
||||
let S := candidate
|
||||
let R := relevant ∩ S
|
||||
let R' := S \ R
|
||||
let M := R.foldl (fun acc n => acc + weight n) 0
|
||||
M / (M + R'.size.toFloat)
|
||||
|
||||
-- This function is taken from the MePo paper and needs to be tuned.
|
||||
def weightFunction (n : Nat) := 1.0 + 2.0 / (n.log2.toFloat + 1.0)
|
||||
|
||||
def frequencyScore (frequency : Name → Nat) (relevant candidate : NameSet) : Float :=
|
||||
weightedScore (fun n => weightFunction (frequency n)) relevant candidate
|
||||
|
||||
def unweightedScore (relevant candidate : NameSet) : Float := weightedScore (fun _ => 1) relevant candidate
|
||||
|
||||
def mepo (initialRelevant : NameSet) (score : NameSet → NameSet → Float) (accept : ConstantInfo → CoreM Bool) (p : Float) (c : Float) : CoreM (Array (Name × Float)) := do
|
||||
let env ← getEnv
|
||||
let mut p := p
|
||||
let mut candidates := #[]
|
||||
let mut relevant := initialRelevant
|
||||
let mut accepted : Array (Name × Float) := {}
|
||||
for (n, ci) in env.constants do
|
||||
if ← accept ci then
|
||||
candidates := candidates.push (n, ci.type.getUsedConstantsAsSet)
|
||||
while candidates.size > 0 do
|
||||
let (newAccepted, candidates') := candidates.map (fun (n, c) => (n, c, score relevant c)) |>.partition fun (_, _, s) => p ≤ s
|
||||
if newAccepted.isEmpty then return accepted
|
||||
accepted := newAccepted.foldl (fun acc (n, _, s) => acc.push (n, s)) accepted
|
||||
candidates := candidates'.map fun (n, c, _) => (n, c)
|
||||
relevant := newAccepted.foldl (fun acc (_, ns, _) => acc ++ ns) relevant
|
||||
p := p + (1 - p) / c
|
||||
return accepted
|
||||
|
||||
open Lean Meta MVarId in
|
||||
def _root_.Lean.MVarId.getConstants (g : MVarId) : MetaM NameSet := withContext g do
|
||||
let mut c := (← g.getType).getUsedConstantsAsSet
|
||||
for t in (← getLocalHyps) do
|
||||
c := c ∪ (← inferType t).getUsedConstantsAsSet
|
||||
return c
|
||||
|
||||
end MePo
|
||||
|
||||
open MePo
|
||||
|
||||
-- The values of p := 0.6 and c := 2.4 are taken from the MePo paper, and need to be tuned.
|
||||
-- When retrieving ≤32 premises for use by downstream automation, Thomas Zhu suggests that c := 0.5 is optimal.
|
||||
public def mepoSelector (useRarity : Bool) (p : Float := 0.6) (c : Float := 2.4) : Selector := fun g config => do
|
||||
let constants ← g.getConstants
|
||||
let env ← getEnv
|
||||
let score := if useRarity then
|
||||
let frequency := symbolFrequency env
|
||||
frequencyScore (frequency.getD · 0)
|
||||
else
|
||||
unweightedScore
|
||||
let accept := fun ci => return !isDeniedPremise env ci.name
|
||||
let suggestions ← mepo constants score accept p c
|
||||
let suggestions := suggestions
|
||||
|>.map (fun (n, s) => { name := n, score := s })
|
||||
|>.reverse -- we favor constants that appear at the end of `env.constants`
|
||||
match config.maxSuggestions with
|
||||
| none => return suggestions
|
||||
| some k => return suggestions.take k
|
||||
26
tests/lean/run/premise_selection_mepo.lean
Normal file
26
tests/lean/run/premise_selection_mepo.lean
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import Lean.PremiseSelection.MePo
|
||||
|
||||
set_premise_selector Lean.PremiseSelection.mepoSelector (useRarity := false)
|
||||
|
||||
example (a b : Int) : a + b = b + a := by
|
||||
suggest_premises
|
||||
sorry
|
||||
|
||||
-- #time
|
||||
example (x y z : List Int) : x ++ y ++ z = x ++ (y ++ z) := by
|
||||
suggest_premises
|
||||
sorry
|
||||
|
||||
-- `useRarity` is too slow in practice: it requires analyzing all the types in the environment.
|
||||
-- It would need to be cached.
|
||||
|
||||
-- set_premise_selector Lean.PremiseSelection.mepoSelector (useRarity := true)
|
||||
|
||||
-- example (a b : Int) : a + b = b + a := by
|
||||
-- suggest_premises
|
||||
-- sorry
|
||||
|
||||
-- #time
|
||||
-- example (x y z : List Int) : x ++ y ++ z = x ++ (y ++ z) := by
|
||||
-- suggest_premises
|
||||
-- sorry
|
||||
Loading…
Add table
Reference in a new issue