From 36dc037d654e0e1909fc9bfb8c7665dfcfbfd8ae Mon Sep 17 00:00:00 2001 From: Gabriel Ebner Date: Tue, 25 Jul 2017 08:26:46 +0100 Subject: [PATCH] perf(util/rc): do not use fences As suggested by David Chisnall. If I read the spec correctly, it would be unsafe to use the release ordering for dec_ref_core: then the following situation could happen: ```c++ // m_rc -> 2 // Thread 1: unsigned x = atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release); // Thread 2: unsigned y = atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release); // x = y = 1u, m_rc -> 2 ``` That is, a release store operation is not required to be visible to another release operation. Herb Sutter also recommends the acq_rel ordering for reference counters. --- src/util/rc.h | 5 ++--- src/util/thread.h | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/util/rc.h b/src/util/rc.h index 574e3a1e32..5f771504f0 100644 --- a/src/util/rc.h +++ b/src/util/rc.h @@ -14,12 +14,11 @@ Author: Leonardo de Moura private: \ atomic m_rc; \ public: \ -unsigned get_rc() const { return atomic_load(&m_rc); } \ +unsigned get_rc() const { return atomic_load_explicit(&m_rc, memory_order_acquire); } \ void inc_ref() { atomic_fetch_add_explicit(&m_rc, 1u, memory_order_relaxed); } \ bool dec_ref_core() { \ lean_assert(get_rc() > 0); \ - if (atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release) == 1u) { \ - atomic_thread_fence(memory_order_acquire); \ + if (atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_acq_rel) == 1u) { \ return true; \ } else { \ return false; \ diff --git a/src/util/thread.h b/src/util/thread.h index e8b8206423..51bbaff1cb 100644 --- a/src/util/thread.h +++ b/src/util/thread.h @@ -37,11 +37,13 @@ using std::condition_variable; using std::lock_guard; using std::unique_lock; using std::atomic_load; +using std::atomic_load_explicit; using std::atomic_fetch_add_explicit; using std::atomic_fetch_sub_explicit; using std::memory_order_relaxed; using std::memory_order_release; using std::memory_order_acquire; +using std::memory_order_acq_rel; using std::memory_order_seq_cst; using std::atomic_thread_fence; namespace this_thread = std::this_thread; @@ -70,6 +72,7 @@ namespace lean { constexpr int memory_order_relaxed = 0; constexpr int memory_order_release = 0; constexpr int memory_order_acquire = 0; +constexpr int memory_order_acq_rel = 0; constexpr int memory_order_seq_cst = 0; inline void atomic_thread_fence(int ) {} template @@ -95,6 +98,7 @@ public: atomic & operator--() { --m_value; return *this; } atomic operator--(int ) { atomic tmp(*this); --m_value; return tmp; } friend T atomic_load(atomic const * a) { return a->m_value; } + friend T atomic_load_explicit(atomic const * a, int) { return a->m_value; } friend T atomic_fetch_add_explicit(atomic * a, T const & v, int ) { T r(a->m_value); a->m_value += v; return r; } friend T atomic_fetch_sub_explicit(atomic * a, T const & v, int ) { T r(a->m_value); a->m_value -= v; return r; } T exchange(T desired) { T old = m_value; m_value = desired; return old; }