perf(util/rc): do not use fences

As suggested by David Chisnall.  If I read the spec correctly, it would
be unsafe to use the release ordering for dec_ref_core: then the
following situation could happen:

```c++
// m_rc -> 2

// Thread 1:
unsigned x = atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release);

// Thread 2:
unsigned y = atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release);

// x = y = 1u, m_rc -> 2
```

That is, a release store operation is not required to be visible to
another release operation.

Herb Sutter also recommends the acq_rel ordering for reference counters.
This commit is contained in:
Gabriel Ebner 2017-07-25 08:26:46 +01:00
parent 1b28b9f5bd
commit 36dc037d65
2 changed files with 6 additions and 3 deletions

View file

@ -14,12 +14,11 @@ Author: Leonardo de Moura
private: \
atomic<unsigned> m_rc; \
public: \
unsigned get_rc() const { return atomic_load(&m_rc); } \
unsigned get_rc() const { return atomic_load_explicit(&m_rc, memory_order_acquire); } \
void inc_ref() { atomic_fetch_add_explicit(&m_rc, 1u, memory_order_relaxed); } \
bool dec_ref_core() { \
lean_assert(get_rc() > 0); \
if (atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_release) == 1u) { \
atomic_thread_fence(memory_order_acquire); \
if (atomic_fetch_sub_explicit(&m_rc, 1u, memory_order_acq_rel) == 1u) { \
return true; \
} else { \
return false; \

View file

@ -37,11 +37,13 @@ using std::condition_variable;
using std::lock_guard;
using std::unique_lock;
using std::atomic_load;
using std::atomic_load_explicit;
using std::atomic_fetch_add_explicit;
using std::atomic_fetch_sub_explicit;
using std::memory_order_relaxed;
using std::memory_order_release;
using std::memory_order_acquire;
using std::memory_order_acq_rel;
using std::memory_order_seq_cst;
using std::atomic_thread_fence;
namespace this_thread = std::this_thread;
@ -70,6 +72,7 @@ namespace lean {
constexpr int memory_order_relaxed = 0;
constexpr int memory_order_release = 0;
constexpr int memory_order_acquire = 0;
constexpr int memory_order_acq_rel = 0;
constexpr int memory_order_seq_cst = 0;
inline void atomic_thread_fence(int ) {}
template<typename T>
@ -95,6 +98,7 @@ public:
atomic & operator--() { --m_value; return *this; }
atomic operator--(int ) { atomic tmp(*this); --m_value; return tmp; }
friend T atomic_load(atomic const * a) { return a->m_value; }
friend T atomic_load_explicit(atomic const * a, int) { return a->m_value; }
friend T atomic_fetch_add_explicit(atomic * a, T const & v, int ) { T r(a->m_value); a->m_value += v; return r; }
friend T atomic_fetch_sub_explicit(atomic * a, T const & v, int ) { T r(a->m_value); a->m_value -= v; return r; }
T exchange(T desired) { T old = m_value; m_value = desired; return old; }