A more restrictive but efficient max sharing primitive. **Motivation:** Some software verification proofs may contain significant redundancy that can be eliminated using hash-consing (also known as `shareCommon`). For example, [theorem `sha512_block_armv8_test_4_sym`](460fe5d74c/Proofs/SHA512/SHA512Sym.lean (L29)) took a few seconds at [`addPreDefinitions` ](1a12f63f74/src/Lean/Elab/PreDefinition/Main.lean (L155)) and one second at `fixLevelParams` on a MacBook Pro (with M1 Pro). The proof term initially had over 16 million subterms, but the redundancy was indirectly and inefficiently eliminated using `Core.transform` at `addPreDefinitions`. I tried to use `shareCommon` method to fix the performance issue, but it was too inefficient. This PR introduces a new `shareCommon'` method that, although less flexible (e.g., it uses only a local cache and hash-consing table), is much more efficient. The new procedure minimizes the number of RC operations and optimizes the caching strategy. It is 20 times faster than the old `shareCommon` procedure for theorem `sha512_block_armv8_test_4_sym`.
461 lines
17 KiB
C++
461 lines
17 KiB
C++
/*
|
||
Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
|
||
Author: Leonardo de Moura
|
||
*/
|
||
#include <vector>
|
||
#include <cstring>
|
||
#include <unordered_map>
|
||
#include <unordered_set>
|
||
#include "runtime/object.h"
|
||
#include "runtime/hash.h"
|
||
|
||
namespace lean {
|
||
|
||
extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2) {
|
||
lean_assert(!lean_is_scalar(o1));
|
||
lean_assert(!lean_is_scalar(o2));
|
||
size_t sz1 = lean_object_byte_size(o1);
|
||
size_t sz2 = lean_object_byte_size(o2);
|
||
if (sz1 != sz2) return false;
|
||
// compare relevant parts of the header
|
||
if (lean_ptr_tag(o1) != lean_ptr_tag(o2)) return false;
|
||
if (lean_ptr_other(o1) != lean_ptr_other(o2)) return false;
|
||
size_t header_sz = sizeof(lean_object);
|
||
lean_assert(sz1 >= header_sz);
|
||
// compare objects' bodies
|
||
return memcmp(reinterpret_cast<char*>(o1) + header_sz, reinterpret_cast<char*>(o2) + header_sz, sz1 - header_sz) == 0;
|
||
}
|
||
|
||
extern "C" LEAN_EXPORT uint64_t lean_sharecommon_hash(b_obj_arg o) {
|
||
lean_assert(!lean_is_scalar(o));
|
||
size_t sz = lean_object_byte_size(o);
|
||
size_t header_sz = sizeof(lean_object);
|
||
// hash relevant parts of the header
|
||
unsigned init = hash(lean_ptr_tag(o), lean_ptr_other(o));
|
||
// hash body
|
||
return hash_str(sz - header_sz, reinterpret_cast<unsigned char const *>(o) + header_sz, init);
|
||
}
|
||
|
||
static obj_res mk_pair(obj_arg a, obj_arg b) {
|
||
object * r = alloc_cnstr(0, 2, 0);
|
||
lean_ctor_set(r, 0, a);
|
||
lean_ctor_set(r, 1, b);
|
||
// std::cout << "mk_pair " << a << " " << b << std::endl;
|
||
return r;
|
||
}
|
||
|
||
class sharecommon_state {
|
||
protected:
|
||
object * m_map_find;
|
||
object * m_map_insert;
|
||
object * m_set_find;
|
||
object * m_set_insert;
|
||
object * m_map;
|
||
object * m_set;
|
||
public:
|
||
sharecommon_state(b_obj_arg tc, obj_arg s) {
|
||
m_map_find = lean_ctor_get(tc, 1);
|
||
m_map_insert = lean_ctor_get(tc, 2);
|
||
m_set_find = lean_ctor_get(tc, 3);
|
||
m_set_insert = lean_ctor_get(tc, 4);
|
||
m_map = lean_ctor_get(s, 0); lean_inc(m_map);
|
||
m_set = lean_ctor_get(s, 1); lean_inc(m_set);
|
||
// std::cout << "sharecommon_state " << m_map << " " << m_set << std::endl;
|
||
lean_dec(s);
|
||
}
|
||
|
||
~sharecommon_state() {
|
||
lean_dec(m_map);
|
||
lean_dec(m_set);
|
||
}
|
||
|
||
obj_res pack(obj_arg a) {
|
||
obj_res r = mk_pair(a, mk_pair(m_map, m_set));
|
||
m_map = lean_box(0);
|
||
m_set = lean_box(0);
|
||
return r;
|
||
}
|
||
|
||
obj_res map_find(b_obj_arg k) {
|
||
lean_inc(m_map_find); lean_inc(m_map); lean_inc(k);
|
||
return lean_apply_2(m_map_find, m_map, k);
|
||
}
|
||
|
||
void map_insert(obj_arg k, obj_arg v) {
|
||
lean_inc(m_map_insert);
|
||
m_map = lean_apply_3(m_map_insert, m_map, k, v);
|
||
}
|
||
|
||
obj_res set_find(b_obj_arg o) {
|
||
lean_inc(m_set_find); lean_inc(m_set); lean_inc(o);
|
||
return lean_apply_2(m_set_find, m_set, o);
|
||
}
|
||
|
||
void set_insert(obj_arg o) {
|
||
lean_inc(m_set_insert);
|
||
m_set = lean_apply_2(m_set_insert, m_set, o);
|
||
}
|
||
};
|
||
|
||
class sharecommon_fn {
|
||
sharecommon_state m_state;
|
||
std::vector<lean_object*> m_children;
|
||
std::vector<lean_object*> m_todo;
|
||
|
||
void clear_children() {
|
||
m_children.clear();
|
||
}
|
||
|
||
bool push_child(b_obj_arg a) {
|
||
if (lean_is_scalar(a)) {
|
||
m_children.push_back(a);
|
||
return true;
|
||
}
|
||
switch (lean_ptr_tag(a)) {
|
||
case LeanReserved:
|
||
lean_unreachable();
|
||
// We do not maximize sharing for the following kinds of objects
|
||
case LeanMPZ: case LeanThunk:
|
||
case LeanTask: case LeanRef:
|
||
case LeanExternal: case LeanClosure:
|
||
m_children.push_back(a);
|
||
return true;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
// Check whether we have already maximized sharing for `a`
|
||
obj_res o = m_state.map_find(a);
|
||
if (o != lean_box(0)) {
|
||
obj_res r = lean_ctor_get(o, 0);
|
||
lean_dec(o);
|
||
// The map still has a reference to `r`
|
||
m_children.push_back(r);
|
||
// std::cout << "cached maximized " << r << "\n";
|
||
return true;
|
||
}
|
||
|
||
m_todo.push_back(a);
|
||
return false;
|
||
}
|
||
|
||
void save(b_obj_arg a, obj_arg new_a) {
|
||
lean_assert(m_todo.size() > 0);
|
||
lean_assert(m_todo.back() == a);
|
||
m_todo.pop_back();
|
||
obj_res opt_new_r = m_state.set_find(new_a);
|
||
if (opt_new_r != lean_box(0)) {
|
||
lean_dec(new_a); // we already have a maximally shared term equivalent to `new_a`
|
||
new_a = lean_ctor_get(opt_new_r, 0);
|
||
lean_inc(new_a);
|
||
lean_dec(opt_new_r);
|
||
lean_inc(a);
|
||
m_state.map_insert(a, new_a);
|
||
// std::cout << "already maximized " << new_a << "\n";
|
||
} else {
|
||
lean_inc(a);
|
||
lean_inc_n(new_a, 3);
|
||
m_state.set_insert(new_a); // `new_a` is a new maximally shared term
|
||
m_state.map_insert(a, new_a); // `new_a` is the maximally shared representation for `a`
|
||
m_state.map_insert(new_a, new_a); // `new_a` is the maximally shared representation for itself
|
||
// std::cout << "new maximized " << new_a << "\n";
|
||
}
|
||
}
|
||
|
||
void visit_array(b_obj_arg a) {
|
||
clear_children();
|
||
bool missing_children = false;
|
||
size_t sz = array_size(a);
|
||
for (size_t i = 0; i < sz; i++) {
|
||
if (!push_child(lean_array_get_core(a, i))) {
|
||
missing_children = true;
|
||
}
|
||
}
|
||
if (missing_children)
|
||
return;
|
||
lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz);
|
||
for (size_t i = 0; i < sz; i++) {
|
||
lean_inc(m_children[i]);
|
||
lean_array_set_core((lean_object*)new_a, i, m_children[i]);
|
||
}
|
||
save(a, (lean_object*)new_a);
|
||
}
|
||
|
||
void visit_sarray(b_obj_arg a) {
|
||
size_t sz = lean_sarray_size(a);
|
||
unsigned elem_sz = lean_sarray_elem_size(a);
|
||
lean_sarray_object * new_a = (lean_sarray_object*)lean_alloc_sarray(elem_sz, sz, sz);
|
||
memcpy(new_a->m_data, lean_to_sarray(a)->m_data, elem_sz*sz);
|
||
save(a, (lean_object*)new_a);
|
||
}
|
||
|
||
void visit_string(b_obj_arg a) {
|
||
size_t sz = lean_string_size(a);
|
||
size_t len = lean_string_len(a);
|
||
lean_string_object * new_a = (lean_string_object*)lean_alloc_string(sz, sz, len);
|
||
lean_set_st_header((lean_object*)new_a, LeanString, 0);
|
||
new_a->m_size = sz;
|
||
new_a->m_capacity = sz;
|
||
new_a->m_length = len;
|
||
memcpy(new_a->m_data, lean_to_string(a)->m_data, sz);
|
||
save(a, (lean_object*)new_a);
|
||
}
|
||
|
||
void visit_ctor(b_obj_arg a) {
|
||
clear_children();
|
||
unsigned num_objs = lean_ctor_num_objs(a);
|
||
bool missing_child = false;
|
||
for (unsigned i = 0; i < num_objs; i++) {
|
||
if (!push_child(lean_ctor_get(a, i))) {
|
||
// std::cout << "missing_child " << a << " #" << i << " := " << lean_ctor_get(a, i) << std::endl;
|
||
missing_child = true;
|
||
lean_assert(m_todo.back() == lean_ctor_get(a, i));
|
||
}
|
||
}
|
||
if (missing_child)
|
||
return;
|
||
unsigned tag = lean_ptr_tag(a);
|
||
unsigned sz = lean_object_byte_size(a);
|
||
unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*);
|
||
unsigned scalar_sz = sz - scalar_offset;
|
||
lean_object * new_a = lean_alloc_ctor(tag, num_objs, scalar_sz);
|
||
for (unsigned i = 0; i < num_objs; i++) {
|
||
lean_inc(m_children[i]);
|
||
lean_ctor_set(new_a, i, m_children[i]);
|
||
}
|
||
if (scalar_sz > 0) {
|
||
memcpy(reinterpret_cast<char*>(new_a) + scalar_offset, reinterpret_cast<char*>(a) + scalar_offset, scalar_sz);
|
||
}
|
||
save(a, new_a);
|
||
}
|
||
|
||
public:
|
||
sharecommon_fn(b_obj_arg tc, obj_arg s):m_state(tc, s) {}
|
||
|
||
obj_res operator()(obj_arg a) {
|
||
if (push_child(a)) {
|
||
return m_state.pack(a);
|
||
}
|
||
while (!m_todo.empty()) {
|
||
b_obj_arg curr = m_todo.back();
|
||
// std::cout << "visiting " << curr << " " << static_cast<unsigned>(lean_ptr_tag(curr)) << "\n";
|
||
switch (lean_ptr_tag(curr)) {
|
||
case LeanClosure: lean_unreachable();
|
||
case LeanArray: visit_array(curr); break;
|
||
case LeanScalarArray: visit_sarray(curr); break;
|
||
case LeanString: visit_string(curr); break;
|
||
case LeanMPZ: lean_unreachable();
|
||
case LeanThunk: lean_unreachable();
|
||
case LeanTask: lean_unreachable();
|
||
case LeanRef: lean_unreachable();
|
||
case LeanExternal: lean_unreachable();
|
||
case LeanReserved: lean_unreachable();
|
||
default: visit_ctor(curr); break;
|
||
}
|
||
}
|
||
|
||
obj_res o = m_state.map_find(a);
|
||
lean_assert(o != lean_box(0));
|
||
obj_res r = lean_ctor_get(o, 0);
|
||
lean_inc(r);
|
||
lean_dec(o);
|
||
lean_dec(a);
|
||
return m_state.pack(r);
|
||
}
|
||
};
|
||
|
||
// def State.shareCommon {α} {σ : @& StateFactory} (s : State σ) (a : α) : α × State σ
|
||
extern "C" LEAN_EXPORT obj_res lean_state_sharecommon(b_obj_arg tc, obj_arg s, obj_arg a) {
|
||
return sharecommon_fn(tc, s)(a);
|
||
}
|
||
|
||
/*
|
||
A faster version of `sharecommon_fn` which only uses a local state.
|
||
It optimizes the number of RC operations, the strategy for caching results,
|
||
and uses C++ hashmap.
|
||
*/
|
||
class sharecommon_quick_fn {
|
||
struct set_hash {
|
||
std::size_t operator()(lean_object * o) const { return lean_sharecommon_hash(o); }
|
||
};
|
||
struct set_eq {
|
||
std::size_t operator()(lean_object * o1, lean_object * o2) const { return lean_sharecommon_eq(o1, o2); }
|
||
};
|
||
|
||
/*
|
||
We use `m_cache` to ensure we do **not** traverse a DAG as a tree.
|
||
We use pointer equality for this collection.
|
||
*/
|
||
std::unordered_map<lean_object *, lean_object *> m_cache;
|
||
/* Set of maximally shared terms. AKA hash-consing table. */
|
||
std::unordered_set<lean_object *, set_hash, set_eq> m_set;
|
||
|
||
/*
|
||
We do not increment reference counters when inserting Lean objects at `m_cache` and `m_set`.
|
||
This is correct because
|
||
- The domain of `m_cache` contains only sub-objects of `lean_sharecommon_quick` parameter,
|
||
and we know the object referenced by this parameter will remain alive.
|
||
- The range of `m_cache` contains only new objects that have been maxed shared, and these
|
||
objects will be are sub-objects of the object returned by `lean_sharecommon_quick`.
|
||
- `m_set` is like the range of `m_cache`.
|
||
*/
|
||
|
||
lean_object * check_cache(lean_object * a) {
|
||
if (!lean_is_exclusive(a)) {
|
||
// We only check the cache if `a` is a shared object
|
||
auto it = m_cache.find(a);
|
||
if (it != m_cache.end()) {
|
||
// All objects stored in the range of `m_cache` are single threaded.
|
||
lean_assert(lean_is_st(it->second));
|
||
// We increment the reference counter because this object
|
||
// will be returned by `lean_sharecommon_quick` or stored into a new object.
|
||
it->second->m_rc++;
|
||
return it->second;
|
||
}
|
||
}
|
||
return nullptr;
|
||
}
|
||
|
||
/*
|
||
`new_a` is a new object that is equal to `a`, but its subobjects are maximally shared.
|
||
*/
|
||
lean_object * save(lean_object * a, lean_object * new_a) {
|
||
lean_assert(lean_is_st(new_a));
|
||
lean_assert(new_a->m_rc == 1);
|
||
auto it = m_set.find(new_a);
|
||
lean_object * result;
|
||
if (it == m_set.end()) {
|
||
// `new_a` is a new object
|
||
m_set.insert(new_a);
|
||
result = new_a;
|
||
} else {
|
||
// We already have a maximally shared object that is equal to `new_a`
|
||
result = *it;
|
||
DEBUG_CODE({
|
||
if (lean_is_ctor(new_a)) {
|
||
lean_assert(lean_is_ctor(result));
|
||
unsigned num_objs = lean_ctor_num_objs(new_a);
|
||
lean_assert(lean_ctor_num_objs(result) == num_objs);
|
||
for (unsigned i = 0; i < num_objs; i++) {
|
||
lean_assert(lean_ctor_get(result, i) == lean_ctor_get(new_a, i));
|
||
}
|
||
}
|
||
});
|
||
lean_dec_ref(new_a); // delete `new_a`
|
||
// All objects in `m_set` are single threaded.
|
||
lean_assert(lean_is_st(result));
|
||
result->m_rc++;
|
||
lean_assert(result->m_rc > 1);
|
||
}
|
||
if (!lean_is_exclusive(a)) {
|
||
// We only cache the result if `a` is a shared object.
|
||
m_cache.insert(std::make_pair(a, result));
|
||
}
|
||
lean_assert(result == new_a || result->m_rc > 1);
|
||
lean_assert(result != new_a || result->m_rc == 1);
|
||
return result;
|
||
}
|
||
|
||
lean_object * visit_sarray(lean_object * a) {
|
||
lean_object * r = check_cache(a);
|
||
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
|
||
|
||
size_t sz = lean_sarray_size(a);
|
||
unsigned elem_sz = lean_sarray_elem_size(a);
|
||
lean_sarray_object * new_a = (lean_sarray_object*)lean_alloc_sarray(elem_sz, sz, sz);
|
||
memcpy(new_a->m_data, lean_to_sarray(a)->m_data, elem_sz*sz);
|
||
return save(a, (lean_object*)new_a);
|
||
}
|
||
|
||
lean_object * visit_string(lean_object * a) {
|
||
lean_object * r = check_cache(a);
|
||
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
|
||
|
||
size_t sz = lean_string_size(a);
|
||
size_t len = lean_string_len(a);
|
||
lean_string_object * new_a = (lean_string_object*)lean_alloc_string(sz, sz, len);
|
||
lean_set_st_header((lean_object*)new_a, LeanString, 0);
|
||
new_a->m_size = sz;
|
||
new_a->m_capacity = sz;
|
||
new_a->m_length = len;
|
||
memcpy(new_a->m_data, lean_to_string(a)->m_data, sz);
|
||
return save(a, (lean_object*)new_a);
|
||
}
|
||
|
||
lean_object * visit_array(lean_object * a) {
|
||
lean_object * r = check_cache(a);
|
||
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
|
||
|
||
size_t sz = array_size(a);
|
||
lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz);
|
||
for (size_t i = 0; i < sz; i++) {
|
||
lean_array_set_core((lean_object*)new_a, i, visit(lean_array_get_core(a, i)));
|
||
}
|
||
return save(a, (lean_object*)new_a);
|
||
}
|
||
|
||
lean_object * visit_ctor(lean_object * a) {
|
||
lean_object * r = check_cache(a);
|
||
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
|
||
|
||
unsigned num_objs = lean_ctor_num_objs(a);
|
||
unsigned tag = lean_ptr_tag(a);
|
||
unsigned sz = lean_object_byte_size(a);
|
||
unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*);
|
||
unsigned scalar_sz = sz - scalar_offset;
|
||
lean_object * new_a = lean_alloc_ctor(tag, num_objs, scalar_sz);
|
||
for (unsigned i = 0; i < num_objs; i++) {
|
||
lean_ctor_set(new_a, i, visit(lean_ctor_get(a, i)));
|
||
}
|
||
if (scalar_sz > 0) {
|
||
memcpy(reinterpret_cast<char*>(new_a) + scalar_offset, reinterpret_cast<char*>(a) + scalar_offset, scalar_sz);
|
||
}
|
||
return save(a, new_a);
|
||
}
|
||
|
||
public:
|
||
|
||
/*
|
||
**TODO:** We did not implement stack overflow detection.
|
||
We claim it is not needed in the current uses of `shareCommon'`.
|
||
If this becomes an issue, we can use the following approach to address the issue without
|
||
affecting the performance.
|
||
- Add an extra `depth` parameter.
|
||
- In `operator()`, estimate the maximum depth based on the remaining stack space. See `check_stack`.
|
||
- If the limit is reached, simply return `a`.
|
||
*/
|
||
lean_object * visit(lean_object * a) {
|
||
if (lean_is_scalar(a)) {
|
||
return a;
|
||
}
|
||
switch (lean_ptr_tag(a)) {
|
||
/*
|
||
Similarly to `sharecommon_fn`, we only maximally share arrays, scalar arrays, strings, and
|
||
constructor objects.
|
||
*/
|
||
case LeanMPZ: lean_inc_ref(a); return a;
|
||
case LeanClosure: lean_inc_ref(a); return a;
|
||
case LeanThunk: lean_inc_ref(a); return a;
|
||
case LeanTask: lean_inc_ref(a); return a;
|
||
case LeanRef: lean_inc_ref(a); return a;
|
||
case LeanExternal: lean_inc_ref(a); return a;
|
||
case LeanReserved: lean_inc_ref(a); return a;
|
||
case LeanArray: return visit_array(a);
|
||
case LeanScalarArray: return visit_sarray(a);
|
||
case LeanString: return visit_string(a);
|
||
default: return visit_ctor(a);
|
||
}
|
||
}
|
||
|
||
lean_object * operator()(lean_object * a) {
|
||
return visit(a);
|
||
}
|
||
};
|
||
|
||
// def ShareCommon.shareCommon' (a : A) : A := a
|
||
extern "C" LEAN_EXPORT obj_res lean_sharecommon_quick(obj_arg a) {
|
||
return sharecommon_quick_fn()(a);
|
||
}
|
||
};
|