fix: add Canonicalizer.lean and use it to canonicalize terms in omega (#3639)
This commit is contained in:
parent
07dac67847
commit
5aca09abca
4 changed files with 168 additions and 2 deletions
|
|
@ -9,6 +9,7 @@ import Init.Omega.Int
|
|||
import Init.Omega.Logic
|
||||
import Init.Data.BitVec.Basic
|
||||
import Lean.Meta.AppBuilder
|
||||
import Lean.Meta.Canonicalizer
|
||||
|
||||
/-!
|
||||
# The `OmegaM` state monad.
|
||||
|
|
@ -54,7 +55,7 @@ structure State where
|
|||
atoms : HashMap Expr Nat := {}
|
||||
|
||||
/-- An intermediate layer in the `OmegaM` monad. -/
|
||||
abbrev OmegaM' := StateRefT State (ReaderT Context MetaM)
|
||||
abbrev OmegaM' := StateRefT State (ReaderT Context CanonM)
|
||||
|
||||
/--
|
||||
Cache of expressions that have been visited, and their reflection as a linear combination.
|
||||
|
|
@ -70,7 +71,7 @@ abbrev OmegaM := StateRefT Cache OmegaM'
|
|||
|
||||
/-- Run a computation in the `OmegaM` monad, starting with no recorded atoms. -/
|
||||
def OmegaM.run (m : OmegaM α) (cfg : OmegaConfig) : MetaM α :=
|
||||
m.run' HashMap.empty |>.run' {} { cfg }
|
||||
m.run' HashMap.empty |>.run' {} { cfg } |>.run
|
||||
|
||||
/-- Retrieve the user-specified configuration options. -/
|
||||
def cfg : OmegaM OmegaConfig := do pure (← read).cfg
|
||||
|
|
@ -244,6 +245,7 @@ Return its index, and, if it is new, a collection of interesting facts about the
|
|||
-/
|
||||
def lookup (e : Expr) : OmegaM (Nat × Option (HashSet Expr)) := do
|
||||
let c ← getThe State
|
||||
let e ← canon e
|
||||
match c.atoms.find? e with
|
||||
| some i => return (i, none)
|
||||
| none =>
|
||||
|
|
|
|||
|
|
@ -48,3 +48,4 @@ import Lean.Meta.Iterator
|
|||
import Lean.Meta.LazyDiscrTree
|
||||
import Lean.Meta.LitValues
|
||||
import Lean.Meta.CheckTactic
|
||||
import Lean.Meta.Canonicalizer
|
||||
|
|
|
|||
146
src/Lean/Meta/Canonicalizer.lean
Normal file
146
src/Lean/Meta/Canonicalizer.lean
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/-
|
||||
Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Leonardo de Moura
|
||||
-/
|
||||
prelude
|
||||
import Lean.Util.ShareCommon
|
||||
import Lean.Data.HashMap
|
||||
import Lean.Meta.Basic
|
||||
import Lean.Meta.FunInfo
|
||||
|
||||
namespace Lean.Meta
|
||||
namespace Canonicalizer
|
||||
|
||||
/-!
|
||||
Applications have implicit arguments. Thus, two terms that may look identical when pretty-printed can be structurally different.
|
||||
For example, `@id (Id Nat) x` and `@id Nat x` are structurally different but are both pretty-printed as `id x`.
|
||||
Moreover, these two terms are definitionally equal since `Id Nat` reduces to `Nat`. This may create situations
|
||||
that are counterintuitive to our users. Furthermore, several tactics (e.g., `omega`) need to collect unique atoms in a goal.
|
||||
One simple approach is to maintain a list of atoms found so far, and whenever a new atom is discovered, perform a
|
||||
linear scan to test whether it is definitionally equal to a previously found one. However, this method is too costly,
|
||||
even if the definitional equality test were inexpensive.
|
||||
|
||||
This module aims to efficiently identify terms that are structurally different, definitionally equal, and structurally equal
|
||||
when we disregard implicit arguments like `@id (Id Nat) x` and `@id Nat x`. The procedure is straightforward. For each atom,
|
||||
we create a new abstracted atom by erasing all implicit information. We refer to this abstracted atom as a 'key.' For the two
|
||||
terms mentioned, the key would be `@id _ x`, where `_` denotes a placeholder for a dummy term. To preserve any
|
||||
pre-existing directed acyclic graph (DAG) structure and prevent exponential blowups while constructing the key, we employ
|
||||
unsafe techniques, such as pointer equality. Additionally, we maintain a mapping from keys to lists of terms, where each
|
||||
list contains terms sharing the same key but not definitionally equal. We posit that these lists will be small in practice.
|
||||
-/
|
||||
|
||||
/--
|
||||
Auxiliary structure for creating a pointer-equality mapping from `Expr` to `Key`.
|
||||
We use this mapping to ensure we preserve the dag-structure of input expressions.
|
||||
-/
|
||||
structure ExprVisited where
|
||||
e : Expr
|
||||
deriving Inhabited
|
||||
|
||||
unsafe instance : BEq ExprVisited where
|
||||
beq a b := ptrAddrUnsafe a == ptrAddrUnsafe b
|
||||
|
||||
unsafe instance : Hashable ExprVisited where
|
||||
hash a := USize.toUInt64 (ptrAddrUnsafe a)
|
||||
|
||||
abbrev Key := ExprVisited
|
||||
|
||||
/--
|
||||
State for the `CanonM` monad.
|
||||
-/
|
||||
structure State where
|
||||
/-- "Set" of all keys created so far. This is a hash-consing helper structure available in Lean. -/
|
||||
keys : ShareCommon.State.{0} Lean.ShareCommon.objectFactory := ShareCommon.State.mk Lean.ShareCommon.objectFactory
|
||||
/-- Mapping from `Expr` to `Key`. See comment at `ExprVisited`. -/
|
||||
-- We use `HashMapImp` to ensure we don't have to tag `State` as `unsafe`.
|
||||
cache : HashMapImp ExprVisited Key := mkHashMapImp
|
||||
/--
|
||||
Given a key `k` and `keyToExprs.find? k = some es`, we have that all `es` share key `k`, and
|
||||
are not definitionally equal modulo the transparency setting used. -/
|
||||
keyToExprs : HashMapImp Key (List Expr) := mkHashMapImp
|
||||
|
||||
instance : Inhabited State where
|
||||
default := {}
|
||||
|
||||
abbrev CanonM := ReaderT TransparencyMode $ StateRefT State MetaM
|
||||
|
||||
/--
|
||||
The definitionally equality tests are performed using the given transparency mode.
|
||||
We claim `TransparencyMode.instances` is a good setting for most applications.
|
||||
-/
|
||||
def CanonM.run (x : CanonM α) (transparency := TransparencyMode.instances) : MetaM α :=
|
||||
StateRefT'.run' (x transparency) {}
|
||||
|
||||
private def shareCommon (a : α) : CanonM α :=
|
||||
modifyGet fun { keys, cache, keyToExprs } =>
|
||||
let (a, keys) := ShareCommon.State.shareCommon keys a
|
||||
(a, { keys, cache, keyToExprs })
|
||||
|
||||
private partial def mkKey (e : Expr) : CanonM Key := do
|
||||
if let some key := unsafe (← get).cache.find? { e } then
|
||||
return key
|
||||
else
|
||||
let key ← match e with
|
||||
| .sort .. | .fvar .. | .bvar .. | .const .. | .lit .. =>
|
||||
pure { e := (← shareCommon e) }
|
||||
| .mvar .. =>
|
||||
-- We instantiate assigned metavariables because the
|
||||
-- pretty-printer also instantiates them.
|
||||
let eNew ← instantiateMVars e
|
||||
if eNew == e then pure { e := (← shareCommon e) }
|
||||
else mkKey eNew
|
||||
| .mdata _ a => mkKey a
|
||||
| .app .. =>
|
||||
let f := (← mkKey e.getAppFn).e
|
||||
if f.isMVar then
|
||||
let eNew ← instantiateMVars e
|
||||
unless eNew == e do
|
||||
return (← mkKey eNew)
|
||||
let info ← getFunInfo f
|
||||
let args ← e.getAppArgs.mapIdxM fun i arg => do
|
||||
if h : i < info.paramInfo.size then
|
||||
let info := info.paramInfo[i]
|
||||
if info.isExplicit then
|
||||
pure (← mkKey arg).e
|
||||
else
|
||||
pure (mkSort 0) -- some dummy value for erasing implicit
|
||||
else
|
||||
pure (← mkKey arg).e
|
||||
pure { e := (← shareCommon (mkAppN f args)) }
|
||||
| .lam n t b i =>
|
||||
pure { e := (← shareCommon (.lam n (← mkKey t).e (← mkKey b).e i)) }
|
||||
| .forallE n t b i =>
|
||||
pure { e := (← shareCommon (.forallE n (← mkKey t).e (← mkKey b).e i)) }
|
||||
| .letE n t v b d =>
|
||||
pure { e := (← shareCommon (.letE n (← mkKey t).e (← mkKey v).e (← mkKey b).e d)) }
|
||||
| .proj t i s =>
|
||||
pure { e := (← shareCommon (.proj t i (← mkKey s).e)) }
|
||||
unsafe modify fun { keys, cache, keyToExprs} => { keys, keyToExprs, cache := cache.insert { e } key |>.1 }
|
||||
return key
|
||||
|
||||
/--
|
||||
"Canonicalize" the given expression.
|
||||
-/
|
||||
def canon (e : Expr) : CanonM Expr := do
|
||||
let k ← mkKey e
|
||||
-- Find all expressions canonicalized before that have the same key.
|
||||
if let some es' := unsafe (← get).keyToExprs.find? k then
|
||||
withTransparency (← read) do
|
||||
for e' in es' do
|
||||
-- Found an expression `e'` that is definitionally equal to `e` and share the same key.
|
||||
if (← isDefEq e e') then
|
||||
return e'
|
||||
-- `e` is not definitionally equal to any expression in `es'`. We claim this should be rare.
|
||||
unsafe modify fun { keys, cache, keyToExprs } => { keys, cache, keyToExprs := keyToExprs.insert k (e :: es') |>.1 }
|
||||
return e
|
||||
else
|
||||
-- `e` is the first expression we found with key `k`.
|
||||
unsafe modify fun { keys, cache, keyToExprs } => { keys, cache, keyToExprs := keyToExprs.insert k [e] |>.1 }
|
||||
return e
|
||||
|
||||
end Canonicalizer
|
||||
|
||||
export Canonicalizer (CanonM canon)
|
||||
|
||||
end Lean.Meta
|
||||
17
tests/lean/run/omegaCanon.lean
Normal file
17
tests/lean/run/omegaCanon.lean
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
def filter (p : α → Prop) [inst : DecidablePred p] (xs : List α) : List α :=
|
||||
match xs with
|
||||
| [] => []
|
||||
| x :: xs' =>
|
||||
if p x then
|
||||
-- Trying to confuse `omega` by creating subterms that are structurally different
|
||||
-- but definitionally equal.
|
||||
x :: @filter α p (fun x => inst x) xs'
|
||||
else
|
||||
filter p xs'
|
||||
|
||||
def filter_length (p : α → Prop) [DecidablePred p] : (filter p xs).length ≤ xs.length := by
|
||||
induction xs with
|
||||
| nil => simp [filter]
|
||||
| cons x xs ih =>
|
||||
simp only [filter]
|
||||
split <;> simp only [List.length] <;> omega
|
||||
Loading…
Add table
Reference in a new issue