From db594425bfdcf545bd99fc0d30088c61959c4e21 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Wed, 31 Jul 2024 21:13:12 +0200 Subject: [PATCH] refactor: `sharecommon` (#4887) This PR also fixes a missing borrow annotation. --- src/Init/ShareCommon.lean | 2 +- src/runtime/sharecommon.cpp | 281 ++++++++++++++++-------------------- src/runtime/sharecommon.h | 46 ++++++ 3 files changed, 174 insertions(+), 155 deletions(-) create mode 100644 src/runtime/sharecommon.h diff --git a/src/Init/ShareCommon.lean b/src/Init/ShareCommon.lean index d3990b6336..26822e791d 100644 --- a/src/Init/ShareCommon.lean +++ b/src/Init/ShareCommon.lean @@ -109,4 +109,4 @@ A more restrictive but efficient max sharing primitive. Remark: it optimizes the number of RC operations, and the strategy for caching results. -/ @[extern "lean_sharecommon_quick"] -def ShareCommon.shareCommon' (a : α) : α := a +def ShareCommon.shareCommon' (a : @& α) : α := a diff --git a/src/runtime/sharecommon.cpp b/src/runtime/sharecommon.cpp index 413c1d49b6..47a3b80f41 100644 --- a/src/runtime/sharecommon.cpp +++ b/src/runtime/sharecommon.cpp @@ -8,7 +8,7 @@ Author: Leonardo de Moura #include #include #include -#include "runtime/object.h" +#include "runtime/sharecommon.h" #include "runtime/hash.h" namespace lean { @@ -271,173 +271,146 @@ extern "C" LEAN_EXPORT obj_res lean_state_sharecommon(b_obj_arg tc, obj_arg s, o return sharecommon_fn(tc, s)(a); } + /* -A faster version of `sharecommon_fn` which only uses a local state. -It optimizes the number of RC operations, the strategy for caching results, -and uses C++ hashmap. + We do not increment reference counters when inserting Lean objects at `m_cache` and `m_set`. + This is correct because + - The domain of `m_cache` contains only sub-objects of `lean_sharecommon_quick` parameter, + and we know the object referenced by this parameter will remain alive. + - The range of `m_cache` contains only new objects that have been maxed shared, and these + objects will be are sub-objects of the object returned by `lean_sharecommon_quick`. + - `m_set` is like the range of `m_cache`. */ -class sharecommon_quick_fn { - struct set_hash { - std::size_t operator()(lean_object * o) const { return lean_sharecommon_hash(o); } - }; - struct set_eq { - std::size_t operator()(lean_object * o1, lean_object * o2) const { return lean_sharecommon_eq(o1, o2); } - }; - /* - We use `m_cache` to ensure we do **not** traverse a DAG as a tree. - We use pointer equality for this collection. - */ - std::unordered_map m_cache; - /* Set of maximally shared terms. AKA hash-consing table. */ - std::unordered_set m_set; - - /* - We do not increment reference counters when inserting Lean objects at `m_cache` and `m_set`. - This is correct because - - The domain of `m_cache` contains only sub-objects of `lean_sharecommon_quick` parameter, - and we know the object referenced by this parameter will remain alive. - - The range of `m_cache` contains only new objects that have been maxed shared, and these - objects will be are sub-objects of the object returned by `lean_sharecommon_quick`. - - `m_set` is like the range of `m_cache`. - */ - - lean_object * check_cache(lean_object * a) { - if (!lean_is_exclusive(a)) { - // We only check the cache if `a` is a shared object - auto it = m_cache.find(a); - if (it != m_cache.end()) { - // All objects stored in the range of `m_cache` are single threaded. - lean_assert(lean_is_st(it->second)); - // We increment the reference counter because this object - // will be returned by `lean_sharecommon_quick` or stored into a new object. - it->second->m_rc++; - return it->second; - } +lean_object * sharecommon_quick_fn::check_cache(lean_object * a) { + if (!lean_is_exclusive(a)) { + // We only check the cache if `a` is a shared object + auto it = m_cache.find(a); + if (it != m_cache.end()) { + // All objects stored in the range of `m_cache` are single threaded. + lean_assert(lean_is_st(it->second)); + // We increment the reference counter because this object + // will be returned by `lean_sharecommon_quick` or stored into a new object. + it->second->m_rc++; + return it->second; } - return nullptr; } + return nullptr; +} - /* - `new_a` is a new object that is equal to `a`, but its subobjects are maximally shared. - */ - lean_object * save(lean_object * a, lean_object * new_a) { - lean_assert(lean_is_st(new_a)); - lean_assert(new_a->m_rc == 1); - auto it = m_set.find(new_a); - lean_object * result; - if (it == m_set.end()) { - // `new_a` is a new object - m_set.insert(new_a); - result = new_a; - } else { - // We already have a maximally shared object that is equal to `new_a` - result = *it; - DEBUG_CODE({ - if (lean_is_ctor(new_a)) { - lean_assert(lean_is_ctor(result)); - unsigned num_objs = lean_ctor_num_objs(new_a); - lean_assert(lean_ctor_num_objs(result) == num_objs); - for (unsigned i = 0; i < num_objs; i++) { - lean_assert(lean_ctor_get(result, i) == lean_ctor_get(new_a, i)); - } +/* + `new_a` is a new object that is equal to `a`, but its subobjects are maximally shared. +*/ +lean_object * sharecommon_quick_fn::save(lean_object * a, lean_object * new_a) { + lean_assert(lean_is_st(new_a)); + lean_assert(new_a->m_rc == 1); + auto it = m_set.find(new_a); + lean_object * result; + if (it == m_set.end()) { + // `new_a` is a new object + m_set.insert(new_a); + result = new_a; + } else { + // We already have a maximally shared object that is equal to `new_a` + result = *it; + DEBUG_CODE({ + if (lean_is_ctor(new_a)) { + lean_assert(lean_is_ctor(result)); + unsigned num_objs = lean_ctor_num_objs(new_a); + lean_assert(lean_ctor_num_objs(result) == num_objs); + for (unsigned i = 0; i < num_objs; i++) { + lean_assert(lean_ctor_get(result, i) == lean_ctor_get(new_a, i)); } - }); - lean_dec_ref(new_a); // delete `new_a` - // All objects in `m_set` are single threaded. - lean_assert(lean_is_st(result)); - result->m_rc++; - lean_assert(result->m_rc > 1); - } - if (!lean_is_exclusive(a)) { - // We only cache the result if `a` is a shared object. - m_cache.insert(std::make_pair(a, result)); - } - lean_assert(result == new_a || result->m_rc > 1); - lean_assert(result != new_a || result->m_rc == 1); - return result; + } + }); + lean_dec_ref(new_a); // delete `new_a` + // All objects in `m_set` are single threaded. + lean_assert(lean_is_st(result)); + result->m_rc++; + lean_assert(result->m_rc > 1); } + if (!lean_is_exclusive(a)) { + // We only cache the result if `a` is a shared object. + m_cache.insert(std::make_pair(a, result)); + } + lean_assert(result == new_a || result->m_rc > 1); + lean_assert(result != new_a || result->m_rc == 1); + return result; +} - // `sarray` and `string` - lean_object * visit_terminal(lean_object * a) { - auto it = m_set.find(a); - if (it == m_set.end()) { - m_set.insert(a); - } else { - a = *it; - } - lean_inc_ref(a); +// `sarray` and `string` +lean_object * sharecommon_quick_fn::visit_terminal(lean_object * a) { + auto it = m_set.find(a); + if (it == m_set.end()) { + m_set.insert(a); + } else { + a = *it; + } + lean_inc_ref(a); + return a; +} + +lean_object * sharecommon_quick_fn::visit_array(lean_object * a) { + lean_object * r = check_cache(a); + if (r != nullptr) { lean_assert(r->m_rc > 1); return r; } + + size_t sz = array_size(a); + lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz); + for (size_t i = 0; i < sz; i++) { + lean_array_set_core((lean_object*)new_a, i, visit(lean_array_get_core(a, i))); + } + return save(a, (lean_object*)new_a); +} + +lean_object * sharecommon_quick_fn::visit_ctor(lean_object * a) { + lean_object * r = check_cache(a); + if (r != nullptr) { lean_assert(r->m_rc > 1); return r; } + unsigned num_objs = lean_ctor_num_objs(a); + unsigned tag = lean_ptr_tag(a); + unsigned sz = lean_object_byte_size(a); + unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*); + unsigned scalar_sz = sz - scalar_offset; + lean_object * new_a = lean_alloc_ctor(tag, num_objs, scalar_sz); + for (unsigned i = 0; i < num_objs; i++) { + lean_ctor_set(new_a, i, visit(lean_ctor_get(a, i))); + } + if (scalar_sz > 0) { + memcpy(reinterpret_cast(new_a) + scalar_offset, reinterpret_cast(a) + scalar_offset, scalar_sz); + } + return save(a, new_a); +} + +/* +**TODO:** We did not implement stack overflow detection. +We claim it is not needed in the current uses of `shareCommon'`. +If this becomes an issue, we can use the following approach to address the issue without +affecting the performance. +- Add an extra `depth` parameter. +- In `operator()`, estimate the maximum depth based on the remaining stack space. See `check_stack`. +- If the limit is reached, simply return `a`. +*/ +lean_object * sharecommon_quick_fn::visit(lean_object * a) { + if (lean_is_scalar(a)) { return a; } - - lean_object * visit_array(lean_object * a) { - lean_object * r = check_cache(a); - if (r != nullptr) { lean_assert(r->m_rc > 1); return r; } - - size_t sz = array_size(a); - lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz); - for (size_t i = 0; i < sz; i++) { - lean_array_set_core((lean_object*)new_a, i, visit(lean_array_get_core(a, i))); - } - return save(a, (lean_object*)new_a); - } - - lean_object * visit_ctor(lean_object * a) { - lean_object * r = check_cache(a); - if (r != nullptr) { lean_assert(r->m_rc > 1); return r; } - unsigned num_objs = lean_ctor_num_objs(a); - unsigned tag = lean_ptr_tag(a); - unsigned sz = lean_object_byte_size(a); - unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*); - unsigned scalar_sz = sz - scalar_offset; - lean_object * new_a = lean_alloc_ctor(tag, num_objs, scalar_sz); - for (unsigned i = 0; i < num_objs; i++) { - lean_ctor_set(new_a, i, visit(lean_ctor_get(a, i))); - } - if (scalar_sz > 0) { - memcpy(reinterpret_cast(new_a) + scalar_offset, reinterpret_cast(a) + scalar_offset, scalar_sz); - } - return save(a, new_a); - } - -public: - + switch (lean_ptr_tag(a)) { /* - **TODO:** We did not implement stack overflow detection. - We claim it is not needed in the current uses of `shareCommon'`. - If this becomes an issue, we can use the following approach to address the issue without - affecting the performance. - - Add an extra `depth` parameter. - - In `operator()`, estimate the maximum depth based on the remaining stack space. See `check_stack`. - - If the limit is reached, simply return `a`. + Similarly to `sharecommon_fn`, we only maximally share arrays, scalar arrays, strings, and + constructor objects. */ - lean_object * visit(lean_object * a) { - if (lean_is_scalar(a)) { - return a; - } - switch (lean_ptr_tag(a)) { - /* - Similarly to `sharecommon_fn`, we only maximally share arrays, scalar arrays, strings, and - constructor objects. - */ - case LeanMPZ: lean_inc_ref(a); return a; - case LeanClosure: lean_inc_ref(a); return a; - case LeanThunk: lean_inc_ref(a); return a; - case LeanTask: lean_inc_ref(a); return a; - case LeanRef: lean_inc_ref(a); return a; - case LeanExternal: lean_inc_ref(a); return a; - case LeanReserved: lean_inc_ref(a); return a; - case LeanScalarArray: return visit_terminal(a); - case LeanString: return visit_terminal(a); - case LeanArray: return visit_array(a); - default: return visit_ctor(a); - } + case LeanMPZ: lean_inc_ref(a); return a; + case LeanClosure: lean_inc_ref(a); return a; + case LeanThunk: lean_inc_ref(a); return a; + case LeanTask: lean_inc_ref(a); return a; + case LeanRef: lean_inc_ref(a); return a; + case LeanExternal: lean_inc_ref(a); return a; + case LeanReserved: lean_inc_ref(a); return a; + case LeanScalarArray: return visit_terminal(a); + case LeanString: return visit_terminal(a); + case LeanArray: return visit_array(a); + default: return visit_ctor(a); } - - lean_object * operator()(lean_object * a) { - return visit(a); - } -}; +} // def ShareCommon.shareCommon' (a : A) : A := a extern "C" LEAN_EXPORT obj_res lean_sharecommon_quick(obj_arg a) { diff --git a/src/runtime/sharecommon.h b/src/runtime/sharecommon.h new file mode 100644 index 0000000000..1ac304182e --- /dev/null +++ b/src/runtime/sharecommon.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura +*/ +#pragma once +#include "runtime/object.h" + +namespace lean { +extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2); +extern "C" LEAN_EXPORT uint64_t lean_sharecommon_hash(b_obj_arg o); + +/* +A faster version of `sharecommon_fn` which only uses a local state. +It optimizes the number of RC operations, the strategy for caching results, +and uses C++ hashmap. +*/ +class sharecommon_quick_fn { + struct set_hash { + std::size_t operator()(lean_object * o) const { return lean_sharecommon_hash(o); } + }; + struct set_eq { + std::size_t operator()(lean_object * o1, lean_object * o2) const { return lean_sharecommon_eq(o1, o2); } + }; + + /* + We use `m_cache` to ensure we do **not** traverse a DAG as a tree. + We use pointer equality for this collection. + */ + std::unordered_map m_cache; + /* Set of maximally shared terms. AKA hash-consing table. */ + std::unordered_set m_set; + + lean_object * check_cache(lean_object * a); + lean_object * save(lean_object * a, lean_object * new_a); + lean_object * visit_terminal(lean_object * a); + lean_object * visit_array(lean_object * a); + lean_object * visit_ctor(lean_object * a); + lean_object * visit(lean_object * a); +public: + lean_object * operator()(lean_object * a) { + return visit(a); + } +}; +};