perf(util/rc): do not use fences

As suggested by David Chisnall. If I read the spec correctly, it would be unsafe to use the release ordering for dec_ref_core: then the following situation could happen: ```c++ // m_rc -> 2 // Thread 1: unsigned x = atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release); // Thread 2: unsigned y = atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release); // x = y = 1u, m_rc -> 2 ``` That is, a release store operation is not required to be visible to another release operation. Herb Sutter also recommends the acq_rel ordering for reference counters.
2017-07-25 08:26:46 +01:00 · 2017-07-25 08:26:46 +01:00 · 36dc037d65
commit 36dc037d65
parent 1b28b9f5bd
2 changed files with 6 additions and 3 deletions
--- a/src/util/rc.h
+++ b/src/util/rc.h
@ -14,12 +14,11 @@ Author: Leonardo de Moura
 private:                                                                \
 atomic<unsigned> m_rc;                                                  \
 public:                                                                 \
-unsigned get_rc() const { return atomic_load(&m_rc); }                  \
+unsigned get_rc() const { return atomic_load_explicit(&m_rc, memory_order_acquire); } \
 void inc_ref() { atomic_fetch_add_explicit(&m_rc, 1u, memory_order_relaxed); } \
 bool dec_ref_core() {                                                   \
    lean_assert(get_rc() > 0);                                          \
-    if (atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release) == 1u) { \
-        atomic_thread_fence(memory_order_acquire);                      \
+    if (atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_acq_rel) == 1u) { \
        return true;                                                    \
    } else {                                                            \
        return false;                                                   \
--- a/src/util/thread.h
+++ b/src/util/thread.h
@ -37,11 +37,13 @@ using std::condition_variable;
 using std::lock_guard;
 using std::unique_lock;
 using std::atomic_load;
+using std::atomic_load_explicit;
 using std::atomic_fetch_add_explicit;
 using std::atomic_fetch_sub_explicit;
 using std::memory_order_relaxed;
 using std::memory_order_release;
 using std::memory_order_acquire;
+using std::memory_order_acq_rel;
 using std::memory_order_seq_cst;
 using std::atomic_thread_fence;
 namespace this_thread = std::this_thread;
@ -70,6 +72,7 @@ namespace lean {
 constexpr int memory_order_relaxed = 0;
 constexpr int memory_order_release = 0;
 constexpr int memory_order_acquire = 0;
+constexpr int memory_order_acq_rel = 0;
 constexpr int memory_order_seq_cst = 0;
 inline void atomic_thread_fence(int ) {}
 template<typename T>
@ -95,6 +98,7 @@ public:
    atomic & operator--() { --m_value; return *this; }
    atomic   operator--(int ) { atomic tmp(*this); --m_value; return tmp; }
    friend T atomic_load(atomic const * a) { return a->m_value; }
+    friend T atomic_load_explicit(atomic const * a, int) { return a->m_value; }
    friend T atomic_fetch_add_explicit(atomic * a, T const & v, int ) { T r(a->m_value); a->m_value += v; return r; }
    friend T atomic_fetch_sub_explicit(atomic * a, T const & v, int ) { T r(a->m_value); a->m_value -= v; return r; }
    T exchange(T desired) { T old = m_value; m_value = desired; return old; }