From 0807f73171ca8f765c11ef37d69fd95e6613a878 Mon Sep 17 00:00:00 2001 From: Kim Morrison <477956+kim-em@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:40:22 +1000 Subject: [PATCH] feat: basic premise selection algorithm based on MePo (#7844) This PR adds a simple implementation of MePo, from "Lightweight relevance filtering for machine-generated resolution problems" by Meng and Paulson. This needs tuning, but is already useful as a baseline or test case. --------- Co-authored-by: Thomas Zhu --- src/Lean/Data/NameMap.lean | 2 - src/Lean/PremiseSelection.lean | 133 +-------------- src/Lean/PremiseSelection/Basic.lean | 180 +++++++++++++++++++++ src/Lean/PremiseSelection/MePo.lean | 94 +++++++++++ tests/lean/run/premise_selection_mepo.lean | 26 +++ 5 files changed, 302 insertions(+), 133 deletions(-) create mode 100644 src/Lean/PremiseSelection/Basic.lean create mode 100644 src/Lean/PremiseSelection/MePo.lean create mode 100644 tests/lean/run/premise_selection_mepo.lean diff --git a/src/Lean/Data/NameMap.lean b/src/Lean/Data/NameMap.lean index bbd529b6f7..8801e1ec7e 100644 --- a/src/Lean/Data/NameMap.lean +++ b/src/Lean/Data/NameMap.lean @@ -8,5 +8,3 @@ module prelude public import Lean.Data.NameMap.Basic public import Lean.Data.NameMap.AdditionalOperations - -public section diff --git a/src/Lean/PremiseSelection.lean b/src/Lean/PremiseSelection.lean index 47b4ded7e3..08a65c6bbd 100644 --- a/src/Lean/PremiseSelection.lean +++ b/src/Lean/PremiseSelection.lean @@ -6,134 +6,5 @@ Authors: Kim Morrison module prelude -public import Lean.Elab.Command -public import Lean.Meta.Eval -public import Lean.Meta.CompletionName -public import Init.Data.Random - -public section - -/-! -# An API for premise selection algorithms. - -This module provides a basic API for premise selection algorithms, -which are used to suggest identifiers that should be introduced in a proof. - -The core interface is the `Selector` type, which is a function from a metavariable -and a configuration to a list of suggestions. - -The `Selector` is registered as an environment extension, and the trivial (no suggestions) implementation -is `Lean.PremiseSelection.empty`. - -Lean does not provide a default premise selector, so this module is intended to be used in conjunction -with a downstream package which registers a premise selector. --/ - -namespace Lean.PremiseSelection - -/-- -A `Suggestion` is essentially just an identifier and a confidence score that the identifier is relevant. -If the premise selection request included information about the intended use (e.g. in the simplifier, in `grind`, etc.) -the score may be adjusted for that application. - --/ -structure Suggestion where - name : Name - /-- - The score of the suggestion, as a probability that this suggestion should be used. - -/ - score : Float - -structure Config where - /-- - The maximum number of suggestions to return. - -/ - maxSuggestions : Option Nat := none - /-- - The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`. - This may be used to adjust the score of the suggestions - -/ - caller : Option Name := none - /-- - A filter on suggestions; only suggestions returning `true` should be returned. - (It can be better to filter on the premise selection side, to ensure that enough suggestions are returned.) - -/ - filter : Name → MetaM Bool := fun _ => pure true - /-- - An optional arbitrary "hint" to the premise selection algorithm. - There is no guarantee that the algorithm will make any use of the hint. - - Potential use cases include a natural language comment provided by the user - (e.g. allowing use of the premise selector as a search engine) - or including context from the current proof and/or file. - - We may later split these use cases into separate fields if necessary. - -/ - hint : Option String := none - -abbrev Selector : Type := MVarId → Config → MetaM (Array Suggestion) - -/-- -The trivial premise selector, which returns no suggestions. --/ -def empty : Selector := fun _ _ => pure #[] - -/-- A random premise selection algorithm, provided solely for testing purposes. -/ -def random (gen : StdGen := ⟨37, 59⟩) : Selector := fun _ cfg => do - IO.stdGenRef.set gen - let env ← getEnv - let max := cfg.maxSuggestions.getD 10 - let consts := env.const2ModIdx.keysArray - let mut suggestions := #[] - while suggestions.size < max do - let i ← IO.rand 0 consts.size - let name := consts[i]! - if ! (`Lean).isPrefixOf name && Lean.Meta.allowCompletion env name then - suggestions := suggestions.push { name := name, score := 1.0 / consts.size.toFloat } - return suggestions - -initialize premiseSelectorExt : EnvExtension (Option Selector) ← - registerEnvExtension (pure none) - -/-- Generate premise suggestions for the given metavariable, using the currently registered premise selector. -/ -def select (m : MVarId) (c : Config := {}) : MetaM (Array Suggestion) := do - let some selector := premiseSelectorExt.getState (← getEnv) | - throwError "No premise selector registered. \ - (Note the Lean does not provide a default premise selector, these must be installed by a downstream library.)" - selector m c - -/-! -Currently the registration mechanism is just global state. -This means that if multiple modules register premise selectors, -the behaviour will be dependent on the order of loading modules. - -We should replace this with a mechanism so that -premise selectors are configured via options in the `lakefile`, and -commands are only used to override in a single declaration or file. --/ - -/-- Set the current premise selector.-/ -def registerPremiseSelector (selector : Selector) : CoreM Unit := do - modifyEnv fun env => premiseSelectorExt.setState env (some selector) - -open Lean Elab Command in -@[builtin_command_elab setPremiseSelectorCmd, inherit_doc setPremiseSelectorCmd] -def elabSetPremiseSelector : CommandElab - | `(command| set_premise_selector $selector) => do - let selector ← liftTermElabM do - try - let selectorTerm ← Term.elabTermEnsuringType selector (some (Expr.const ``Selector [])) - unsafe Meta.evalExpr Selector (Expr.const ``Selector []) selectorTerm - catch _ => - throwError "Failed to elaborate {selector} as a `MVarId → Config → MetaM (Array Suggestion)`." - liftCoreM (registerPremiseSelector selector) - | _ => throwUnsupportedSyntax - -open Lean.Elab.Tactic in -@[builtin_tactic Lean.Parser.Tactic.suggestPremises] def evalSuggestPremises : Tactic := fun _ => - liftMetaTactic1 fun mvarId => do - let suggestions ← select mvarId - logInfo m!"Premise suggestions: {suggestions.map (·.name)}" - return mvarId - -end Lean.PremiseSelection +import Lean.PremiseSelection.Basic +import Lean.PremiseSelection.MePo diff --git a/src/Lean/PremiseSelection/Basic.lean b/src/Lean/PremiseSelection/Basic.lean new file mode 100644 index 0000000000..1441c2f0d1 --- /dev/null +++ b/src/Lean/PremiseSelection/Basic.lean @@ -0,0 +1,180 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Kim Morrison +-/ +module + +prelude +public import Lean.Elab.Command +public import Lean.Meta.Eval +public import Lean.Meta.CompletionName +public import Init.Data.Random + +/-! +# An API for premise selection algorithms. + +This module provides a basic API for premise selection algorithms, +which are used to suggest identifiers that should be introduced in a proof. + +The core interface is the `Selector` type, which is a function from a metavariable +and a configuration to a list of suggestions. + +The `Selector` is registered as an environment extension, and the trivial (no suggestions) implementation +is `Lean.PremiseSelection.empty`. + +Lean does not provide a default premise selector, so this module is intended to be used in conjunction +with a downstream package which registers a premise selector. +-/ + +@[expose] public section + +namespace Lean.PremiseSelection + +/-- +A `Suggestion` is essentially just an identifier and a confidence score that the identifier is relevant. +If the premise selection request included information about the intended use (e.g. in the simplifier, in `grind`, etc.) +the score may be adjusted for that application. + +-/ +structure Suggestion where + name : Name + /-- + The score of the suggestion, as a probability that this suggestion should be used. + -/ + score : Float + +structure Config where + /-- + The maximum number of suggestions to return. + -/ + maxSuggestions : Option Nat := none + /-- + The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`. + This may be used to adjust the score of the suggestions + -/ + caller : Option Name := none + /-- + A filter on suggestions; only suggestions returning `true` should be returned. + (It can be better to filter on the premise selection side, to ensure that enough suggestions are returned.) + -/ + filter : Name → MetaM Bool := fun _ => pure true + /-- + An optional arbitrary "hint" to the premise selection algorithm. + There is no guarantee that the algorithm will make any use of the hint. + + Potential use cases include a natural language comment provided by the user + (e.g. allowing use of the premise selector as a search engine) + or including context from the current proof and/or file. + + We may later split these use cases into separate fields if necessary. + -/ + hint : Option String := none + +abbrev Selector : Type := MVarId → Config → MetaM (Array Suggestion) + +section DenyList + +/-- Premises from a module whose name has one of the following components are not retrieved. -/ +builtin_initialize moduleDenyListExt : SimplePersistentEnvExtension String (List String) ← + registerSimplePersistentEnvExtension { + addEntryFn := (·.cons) + addImportedFn := mkStateFromImportedEntries (·.cons) [] + } + +/-- A premise whose name has one of the following components is not retrieved. -/ +builtin_initialize nameDenyListExt : SimplePersistentEnvExtension String (List String) ← + registerSimplePersistentEnvExtension { + addEntryFn := (·.cons) + addImportedFn := mkStateFromImportedEntries (·.cons) [] + } + +/-- A premise whose `type.getForallBody.getAppFn` is a constant that has one of these prefixes is not retrieved. -/ +builtin_initialize typePrefixDenyListExt : SimplePersistentEnvExtension Name (List Name) ← + registerSimplePersistentEnvExtension { + addEntryFn := (·.cons) + addImportedFn := mkStateFromImportedEntries (·.cons) [] + } + +def isDeniedModule (env : Environment) (moduleName : Name) : Bool := + (moduleDenyListExt.getState env).any fun p => moduleName.anyS (· == p) + +def isDeniedPremise (env : Environment) (name : Name) : Bool := Id.run do + if name == ``sorryAx then return true + if name.isInternalDetail then return true + if (nameDenyListExt.getState env).any (fun p => name.anyS (· == p)) then return true + if let some moduleIdx := env.getModuleIdxFor? name then + let moduleName := env.header.moduleNames[moduleIdx.toNat]! + if isDeniedModule env moduleName then + return true + let some ci := env.find? name | return true + if let .const fnName _ := ci.type.getForallBody.getAppFn then + if (typePrefixDenyListExt.getState env).any (fun p => p.isPrefixOf fnName) then return true + return false + +end DenyList + +/-- +The trivial premise selector, which returns no suggestions. +-/ +def empty : Selector := fun _ _ => pure #[] + +/-- A random premise selection algorithm, provided solely for testing purposes. -/ +def random (gen : StdGen := ⟨37, 59⟩) : Selector := fun _ cfg => do + IO.stdGenRef.set gen + let env ← getEnv + let max := cfg.maxSuggestions.getD 10 + let consts := env.const2ModIdx.keysArray + let mut suggestions := #[] + while suggestions.size < max do + let i ← IO.rand 0 consts.size + let name := consts[i]! + unless isDeniedPremise env name do + suggestions := suggestions.push { name := name, score := 1.0 / consts.size.toFloat } + return suggestions + +builtin_initialize premiseSelectorExt : EnvExtension (Option Selector) ← + registerEnvExtension (pure none) + +/-- Generate premise suggestions for the given metavariable, using the currently registered premise selector. -/ +def select (m : MVarId) (c : Config := {}) : MetaM (Array Suggestion) := do + let some selector := premiseSelectorExt.getState (← getEnv) | + throwError "No premise selector registered. \ + (Note the Lean does not provide a default premise selector, these must be installed by a downstream library.)" + selector m c + +/-! +Currently the registration mechanism is just global state. +This means that if multiple modules register premise selectors, +the behaviour will be dependent on the order of loading modules. + +We should replace this with a mechanism so that +premise selectors are configured via options in the `lakefile`, and +commands are only used to override in a single declaration or file. +-/ + +/-- Set the current premise selector.-/ +def registerPremiseSelector (selector : Selector) : CoreM Unit := do + modifyEnv fun env => premiseSelectorExt.setState env (some selector) + +open Lean Elab Command in +@[builtin_command_elab setPremiseSelectorCmd, inherit_doc setPremiseSelectorCmd] +def elabSetPremiseSelector : CommandElab + | `(command| set_premise_selector $selector) => do + let selector ← liftTermElabM do + try + let selectorTerm ← Term.elabTermEnsuringType selector (some (Expr.const ``Selector [])) + unsafe Meta.evalExpr Selector (Expr.const ``Selector []) selectorTerm + catch _ => + throwError "Failed to elaborate {selector} as a `MVarId → Config → MetaM (Array Suggestion)`." + liftCoreM (registerPremiseSelector selector) + | _ => throwUnsupportedSyntax + +open Lean.Elab.Tactic in +@[builtin_tactic Lean.Parser.Tactic.suggestPremises] def evalSuggestPremises : Tactic := fun _ => + liftMetaTactic1 fun mvarId => do + let suggestions ← select mvarId + logInfo m!"Premise suggestions: {suggestions.map (·.name)}" + return mvarId + +end Lean.PremiseSelection diff --git a/src/Lean/PremiseSelection/MePo.lean b/src/Lean/PremiseSelection/MePo.lean new file mode 100644 index 0000000000..d781c0fc06 --- /dev/null +++ b/src/Lean/PremiseSelection/MePo.lean @@ -0,0 +1,94 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Kim Morrison +-/ +module + +prelude +public import Lean.PremiseSelection.Basic +import Lean.Util.FoldConsts +import Lean.Meta.Basic + +/-! +# MePo premise selection + +This is a naive implement of the MePo premise selection algorithm, from +"Lightweight relevance filtering for machine-generated resolution problems" by Meng and Paulson. + +It needs to be tuned and evaluated for Lean. +-/ + +open Lean + +namespace Lean.PremiseSelection.MePo + +def symbolFrequency (env : Environment) : NameMap Nat := Id.run do + let mut map := {} + for (_, ci) in env.constants do + for n' in ci.type.getUsedConstantsAsSet do + map := map.alter n' fun i? => some (i?.getD 0 + 1) + return map + +def weightedScore (weight : Name → Float) (relevant candidate : NameSet) : Float := + let S := candidate + let R := relevant ∩ S + let R' := S \ R + let M := R.foldl (fun acc n => acc + weight n) 0 + M / (M + R'.size.toFloat) + +-- This function is taken from the MePo paper and needs to be tuned. +def weightFunction (n : Nat) := 1.0 + 2.0 / (n.log2.toFloat + 1.0) + +def frequencyScore (frequency : Name → Nat) (relevant candidate : NameSet) : Float := + weightedScore (fun n => weightFunction (frequency n)) relevant candidate + +def unweightedScore (relevant candidate : NameSet) : Float := weightedScore (fun _ => 1) relevant candidate + +def mepo (initialRelevant : NameSet) (score : NameSet → NameSet → Float) (accept : ConstantInfo → CoreM Bool) (p : Float) (c : Float) : CoreM (Array (Name × Float)) := do + let env ← getEnv + let mut p := p + let mut candidates := #[] + let mut relevant := initialRelevant + let mut accepted : Array (Name × Float) := {} + for (n, ci) in env.constants do + if ← accept ci then + candidates := candidates.push (n, ci.type.getUsedConstantsAsSet) + while candidates.size > 0 do + let (newAccepted, candidates') := candidates.map (fun (n, c) => (n, c, score relevant c)) |>.partition fun (_, _, s) => p ≤ s + if newAccepted.isEmpty then return accepted + accepted := newAccepted.foldl (fun acc (n, _, s) => acc.push (n, s)) accepted + candidates := candidates'.map fun (n, c, _) => (n, c) + relevant := newAccepted.foldl (fun acc (_, ns, _) => acc ++ ns) relevant + p := p + (1 - p) / c + return accepted + +open Lean Meta MVarId in +def _root_.Lean.MVarId.getConstants (g : MVarId) : MetaM NameSet := withContext g do + let mut c := (← g.getType).getUsedConstantsAsSet + for t in (← getLocalHyps) do + c := c ∪ (← inferType t).getUsedConstantsAsSet + return c + +end MePo + +open MePo + +-- The values of p := 0.6 and c := 2.4 are taken from the MePo paper, and need to be tuned. +-- When retrieving ≤32 premises for use by downstream automation, Thomas Zhu suggests that c := 0.5 is optimal. +public def mepoSelector (useRarity : Bool) (p : Float := 0.6) (c : Float := 2.4) : Selector := fun g config => do + let constants ← g.getConstants + let env ← getEnv + let score := if useRarity then + let frequency := symbolFrequency env + frequencyScore (frequency.getD · 0) + else + unweightedScore + let accept := fun ci => return !isDeniedPremise env ci.name + let suggestions ← mepo constants score accept p c + let suggestions := suggestions + |>.map (fun (n, s) => { name := n, score := s }) + |>.reverse -- we favor constants that appear at the end of `env.constants` + match config.maxSuggestions with + | none => return suggestions + | some k => return suggestions.take k diff --git a/tests/lean/run/premise_selection_mepo.lean b/tests/lean/run/premise_selection_mepo.lean new file mode 100644 index 0000000000..3d6abf3002 --- /dev/null +++ b/tests/lean/run/premise_selection_mepo.lean @@ -0,0 +1,26 @@ +import Lean.PremiseSelection.MePo + +set_premise_selector Lean.PremiseSelection.mepoSelector (useRarity := false) + +example (a b : Int) : a + b = b + a := by + suggest_premises + sorry + +-- #time +example (x y z : List Int) : x ++ y ++ z = x ++ (y ++ z) := by + suggest_premises + sorry + +-- `useRarity` is too slow in practice: it requires analyzing all the types in the environment. +-- It would need to be cached. + +-- set_premise_selector Lean.PremiseSelection.mepoSelector (useRarity := true) + +-- example (a b : Int) : a + b = b + a := by +-- suggest_premises +-- sorry + +-- #time +-- example (x y z : List Int) : x ++ y ++ z = x ++ (y ++ z) := by +-- suggest_premises +-- sorry