From db594425bfdcf545bd99fc0d30088c61959c4e21 Mon Sep 17 00:00:00 2001
From: Leonardo de Moura <leomoura@amazon.com>
Date: Wed, 31 Jul 2024 21:13:12 +0200
Subject: [PATCH] refactor: `sharecommon` (#4887)

This PR also fixes a missing borrow annotation.
---
 src/Init/ShareCommon.lean   |   2 +-
 src/runtime/sharecommon.cpp | 281 ++++++++++++++++--------------------
 src/runtime/sharecommon.h   |  46 ++++++
 3 files changed, 174 insertions(+), 155 deletions(-)
 create mode 100644 src/runtime/sharecommon.h
diff --git a/src/Init/ShareCommon.lean b/src/Init/ShareCommon.lean
index d3990b6336..26822e791d 100644
--- a/src/Init/ShareCommon.lean
+++ b/src/Init/ShareCommon.lean
@@ -109,4 +109,4 @@ A more restrictive but efficient max sharing primitive.
 Remark: it optimizes the number of RC operations, and the strategy for caching results.
 -/
 @[extern "lean_sharecommon_quick"]
-def ShareCommon.shareCommon' (a : α) : α := a
+def ShareCommon.shareCommon' (a : @& α) : α := a
diff --git a/src/runtime/sharecommon.cpp b/src/runtime/sharecommon.cpp
index 413c1d49b6..47a3b80f41 100644
--- a/src/runtime/sharecommon.cpp
+++ b/src/runtime/sharecommon.cpp
@@ -8,7 +8,7 @@ Author: Leonardo de Moura
 #include <cstring>
 #include <unordered_map>
 #include <unordered_set>
-#include "runtime/object.h"
+#include "runtime/sharecommon.h"
 #include "runtime/hash.h"
 
 namespace lean {
@@ -271,173 +271,146 @@ extern "C" LEAN_EXPORT obj_res lean_state_sharecommon(b_obj_arg tc, obj_arg s, o
     return sharecommon_fn(tc, s)(a);
 }
 
+
 /*
-A faster version of `sharecommon_fn` which only uses a local state.
-It optimizes the number of RC operations, the strategy for caching results,
-and uses C++ hashmap.
+  We do not increment reference counters when inserting Lean objects at `m_cache` and `m_set`.
+  This is correct because
+  - The domain of `m_cache` contains only sub-objects of `lean_sharecommon_quick` parameter,
+    and we know the object referenced by this parameter will remain alive.
+  - The range of `m_cache` contains only new objects that have been maxed shared, and these
+    objects will be are sub-objects of the object returned by `lean_sharecommon_quick`.
+  - `m_set` is like the range of `m_cache`.
 */
-class sharecommon_quick_fn {
-    struct set_hash {
-        std::size_t operator()(lean_object * o) const { return lean_sharecommon_hash(o); }
-    };
-    struct set_eq {
-        std::size_t operator()(lean_object * o1, lean_object * o2) const { return lean_sharecommon_eq(o1, o2); }
-    };
 
-    /*
-    We use `m_cache` to ensure we do **not** traverse a DAG as a tree.
-    We use pointer equality for this collection.
-    */
-    std::unordered_map<lean_object *, lean_object *> m_cache;
-    /* Set of maximally shared terms. AKA hash-consing table. */
-    std::unordered_set<lean_object *, set_hash, set_eq> m_set;
-
-    /*
-      We do not increment reference counters when inserting Lean objects at `m_cache` and `m_set`.
-      This is correct because
-      - The domain of `m_cache` contains only sub-objects of `lean_sharecommon_quick` parameter,
-        and we know the object referenced by this parameter will remain alive.
-      - The range of `m_cache` contains only new objects that have been maxed shared, and these
-        objects will be are sub-objects of the object returned by `lean_sharecommon_quick`.
-      - `m_set` is like the range of `m_cache`.
-    */
-
-    lean_object * check_cache(lean_object * a) {
-        if (!lean_is_exclusive(a)) {
-            // We only check the cache if `a` is a shared object
-            auto it = m_cache.find(a);
-            if (it != m_cache.end()) {
-                // All objects stored in the range of `m_cache` are single threaded.
-                lean_assert(lean_is_st(it->second));
-                // We increment the reference counter because this object
-                // will be returned by `lean_sharecommon_quick` or stored into a new object.
-                it->second->m_rc++;
-                return it->second;
-            }
+lean_object * sharecommon_quick_fn::check_cache(lean_object * a) {
+    if (!lean_is_exclusive(a)) {
+        // We only check the cache if `a` is a shared object
+        auto it = m_cache.find(a);
+        if (it != m_cache.end()) {
+            // All objects stored in the range of `m_cache` are single threaded.
+            lean_assert(lean_is_st(it->second));
+            // We increment the reference counter because this object
+            // will be returned by `lean_sharecommon_quick` or stored into a new object.
+            it->second->m_rc++;
+            return it->second;
         }
-        return nullptr;
     }
+    return nullptr;
+}
 
-    /*
-    `new_a` is a new object that is equal to `a`, but its subobjects are maximally shared.
-    */
-    lean_object * save(lean_object * a, lean_object * new_a) {
-        lean_assert(lean_is_st(new_a));
-        lean_assert(new_a->m_rc == 1);
-        auto it = m_set.find(new_a);
-        lean_object * result;
-        if (it == m_set.end()) {
-            // `new_a` is a new object
-            m_set.insert(new_a);
-            result = new_a;
-        } else {
-            // We already have a maximally shared object that is equal to `new_a`
-            result = *it;
-            DEBUG_CODE({
-                    if (lean_is_ctor(new_a)) {
-                        lean_assert(lean_is_ctor(result));
-                        unsigned num_objs = lean_ctor_num_objs(new_a);
-                        lean_assert(lean_ctor_num_objs(result) == num_objs);
-                        for (unsigned i = 0; i < num_objs; i++) {
-                            lean_assert(lean_ctor_get(result, i) == lean_ctor_get(new_a, i));
-                        }
+/*
+  `new_a` is a new object that is equal to `a`, but its subobjects are maximally shared.
+*/
+lean_object * sharecommon_quick_fn::save(lean_object * a, lean_object * new_a) {
+    lean_assert(lean_is_st(new_a));
+    lean_assert(new_a->m_rc == 1);
+    auto it = m_set.find(new_a);
+    lean_object * result;
+    if (it == m_set.end()) {
+        // `new_a` is a new object
+        m_set.insert(new_a);
+        result = new_a;
+    } else {
+        // We already have a maximally shared object that is equal to `new_a`
+        result = *it;
+        DEBUG_CODE({
+                if (lean_is_ctor(new_a)) {
+                    lean_assert(lean_is_ctor(result));
+                    unsigned num_objs = lean_ctor_num_objs(new_a);
+                    lean_assert(lean_ctor_num_objs(result) == num_objs);
+                    for (unsigned i = 0; i < num_objs; i++) {
+                        lean_assert(lean_ctor_get(result, i) == lean_ctor_get(new_a, i));
                     }
-                });
-            lean_dec_ref(new_a); // delete `new_a`
-            // All objects in `m_set` are single threaded.
-            lean_assert(lean_is_st(result));
-            result->m_rc++;
-            lean_assert(result->m_rc > 1);
-        }
-        if (!lean_is_exclusive(a)) {
-            // We only cache the result if `a` is a shared object.
-            m_cache.insert(std::make_pair(a, result));
-        }
-        lean_assert(result == new_a || result->m_rc > 1);
-        lean_assert(result != new_a || result->m_rc == 1);
-        return result;
+                }
+            });
+        lean_dec_ref(new_a); // delete `new_a`
+        // All objects in `m_set` are single threaded.
+        lean_assert(lean_is_st(result));
+        result->m_rc++;
+        lean_assert(result->m_rc > 1);
     }
+    if (!lean_is_exclusive(a)) {
+        // We only cache the result if `a` is a shared object.
+        m_cache.insert(std::make_pair(a, result));
+    }
+    lean_assert(result == new_a || result->m_rc > 1);
+    lean_assert(result != new_a || result->m_rc == 1);
+    return result;
+}
 
-    // `sarray` and `string`
-    lean_object * visit_terminal(lean_object * a) {
-        auto it = m_set.find(a);
-        if (it == m_set.end()) {
-            m_set.insert(a);
-        } else {
-            a = *it;
-        }
-        lean_inc_ref(a);
+// `sarray` and `string`
+lean_object * sharecommon_quick_fn::visit_terminal(lean_object * a) {
+    auto it = m_set.find(a);
+    if (it == m_set.end()) {
+        m_set.insert(a);
+    } else {
+        a = *it;
+    }
+    lean_inc_ref(a);
+    return a;
+}
+
+lean_object * sharecommon_quick_fn::visit_array(lean_object * a) {
+    lean_object * r = check_cache(a);
+    if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
+
+    size_t sz = array_size(a);
+    lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz);
+    for (size_t i = 0; i < sz; i++) {
+        lean_array_set_core((lean_object*)new_a, i, visit(lean_array_get_core(a, i)));
+    }
+    return save(a, (lean_object*)new_a);
+}
+
+lean_object * sharecommon_quick_fn::visit_ctor(lean_object * a) {
+    lean_object * r = check_cache(a);
+    if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
+    unsigned num_objs      = lean_ctor_num_objs(a);
+    unsigned tag           = lean_ptr_tag(a);
+    unsigned sz            = lean_object_byte_size(a);
+    unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*);
+    unsigned scalar_sz     = sz - scalar_offset;
+    lean_object * new_a    = lean_alloc_ctor(tag, num_objs, scalar_sz);
+    for (unsigned i = 0; i < num_objs; i++) {
+        lean_ctor_set(new_a, i, visit(lean_ctor_get(a, i)));
+    }
+    if (scalar_sz > 0) {
+        memcpy(reinterpret_cast<char*>(new_a) + scalar_offset, reinterpret_cast<char*>(a) + scalar_offset, scalar_sz);
+    }
+    return save(a, new_a);
+}
+
+/*
+**TODO:** We did not implement stack overflow detection.
+We claim it is not needed in the current uses of `shareCommon'`.
+If this becomes an issue, we can use the following approach to address the issue without
+affecting the performance.
+- Add an extra `depth` parameter.
+- In `operator()`, estimate the maximum depth based on the remaining stack space. See `check_stack`.
+- If the limit is reached, simply return `a`.
+*/
+lean_object * sharecommon_quick_fn::visit(lean_object * a) {
+    if (lean_is_scalar(a)) {
         return a;
     }
-
-    lean_object * visit_array(lean_object * a) {
-        lean_object * r = check_cache(a);
-        if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
-
-        size_t sz = array_size(a);
-        lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz);
-        for (size_t i = 0; i < sz; i++) {
-            lean_array_set_core((lean_object*)new_a, i, visit(lean_array_get_core(a, i)));
-        }
-        return save(a, (lean_object*)new_a);
-    }
-
-    lean_object * visit_ctor(lean_object * a) {
-        lean_object * r = check_cache(a);
-        if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
-        unsigned num_objs      = lean_ctor_num_objs(a);
-        unsigned tag           = lean_ptr_tag(a);
-        unsigned sz            = lean_object_byte_size(a);
-        unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*);
-        unsigned scalar_sz     = sz - scalar_offset;
-        lean_object * new_a    = lean_alloc_ctor(tag, num_objs, scalar_sz);
-        for (unsigned i = 0; i < num_objs; i++) {
-            lean_ctor_set(new_a, i, visit(lean_ctor_get(a, i)));
-        }
-        if (scalar_sz > 0) {
-            memcpy(reinterpret_cast<char*>(new_a) + scalar_offset, reinterpret_cast<char*>(a) + scalar_offset, scalar_sz);
-        }
-        return save(a, new_a);
-    }
-
-public:
-
+    switch (lean_ptr_tag(a)) {
     /*
-    **TODO:** We did not implement stack overflow detection.
-    We claim it is not needed in the current uses of `shareCommon'`.
-    If this becomes an issue, we can use the following approach to address the issue without
-    affecting the performance.
-    - Add an extra `depth` parameter.
-    - In `operator()`, estimate the maximum depth based on the remaining stack space. See `check_stack`.
-    - If the limit is reached, simply return `a`.
+      Similarly to `sharecommon_fn`, we only maximally share arrays, scalar arrays, strings, and
+      constructor objects.
     */
-    lean_object * visit(lean_object * a) {
-        if (lean_is_scalar(a)) {
-            return a;
-        }
-        switch (lean_ptr_tag(a)) {
-        /*
-        Similarly to `sharecommon_fn`, we only maximally share arrays, scalar arrays, strings, and
-        constructor objects.
-        */
-        case LeanMPZ:             lean_inc_ref(a); return a;
-        case LeanClosure:         lean_inc_ref(a); return a;
-        case LeanThunk:           lean_inc_ref(a); return a;
-        case LeanTask:            lean_inc_ref(a); return a;
-        case LeanRef:             lean_inc_ref(a); return a;
-        case LeanExternal:        lean_inc_ref(a); return a;
-        case LeanReserved:        lean_inc_ref(a); return a;
-        case LeanScalarArray:     return visit_terminal(a);
-        case LeanString:          return visit_terminal(a);
-        case LeanArray:           return visit_array(a);
-        default:                  return visit_ctor(a);
-        }
+    case LeanMPZ:             lean_inc_ref(a); return a;
+    case LeanClosure:         lean_inc_ref(a); return a;
+    case LeanThunk:           lean_inc_ref(a); return a;
+    case LeanTask:            lean_inc_ref(a); return a;
+    case LeanRef:             lean_inc_ref(a); return a;
+    case LeanExternal:        lean_inc_ref(a); return a;
+    case LeanReserved:        lean_inc_ref(a); return a;
+    case LeanScalarArray:     return visit_terminal(a);
+    case LeanString:          return visit_terminal(a);
+    case LeanArray:           return visit_array(a);
+    default:                  return visit_ctor(a);
     }
-
-    lean_object * operator()(lean_object * a) {
-        return visit(a);
-    }
-};
+}
 
 // def ShareCommon.shareCommon' (a : A) : A := a
 extern "C" LEAN_EXPORT obj_res lean_sharecommon_quick(obj_arg a) {
diff --git a/src/runtime/sharecommon.h b/src/runtime/sharecommon.h
new file mode 100644
index 0000000000..1ac304182e
--- /dev/null
+++ b/src/runtime/sharecommon.h
@@ -0,0 +1,46 @@
+/*
+Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+
+Author: Leonardo de Moura
+*/
+#pragma once
+#include "runtime/object.h"
+
+namespace lean {
+extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2);
+extern "C" LEAN_EXPORT uint64_t lean_sharecommon_hash(b_obj_arg o);
+
+/*
+A faster version of `sharecommon_fn` which only uses a local state.
+It optimizes the number of RC operations, the strategy for caching results,
+and uses C++ hashmap.
+*/
+class sharecommon_quick_fn {
+    struct set_hash {
+        std::size_t operator()(lean_object * o) const { return lean_sharecommon_hash(o); }
+    };
+    struct set_eq {
+        std::size_t operator()(lean_object * o1, lean_object * o2) const { return lean_sharecommon_eq(o1, o2); }
+    };
+
+    /*
+    We use `m_cache` to ensure we do **not** traverse a DAG as a tree.
+    We use pointer equality for this collection.
+    */
+    std::unordered_map<lean_object *, lean_object *> m_cache;
+    /* Set of maximally shared terms. AKA hash-consing table. */
+    std::unordered_set<lean_object *, set_hash, set_eq> m_set;
+
+    lean_object * check_cache(lean_object * a);
+    lean_object * save(lean_object * a, lean_object * new_a);
+    lean_object * visit_terminal(lean_object * a);
+    lean_object * visit_array(lean_object * a);
+    lean_object * visit_ctor(lean_object * a);
+    lean_object * visit(lean_object * a);
+public:
+    lean_object * operator()(lean_object * a) {
+        return visit(a);
+    }
+};
+};