From 559c3253070f8df014fd51adfa6c1c11ca35a417 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Wed, 12 Jun 2019 14:40:49 +0200 Subject: [PATCH] Emulate 64-bit atomic ops if hardware doesn't support them Signed-off-by: Erik Boasson --- src/ddsrt/include/dds/ddsrt/atomics.h | 28 +++ src/ddsrt/include/dds/ddsrt/atomics/gcc.h | 8 + src/ddsrt/src/atomics.c | 205 ++++++++++++++++++++++ src/ddsrt/src/cdtors.c | 2 + 4 files changed, 243 insertions(+) diff --git a/src/ddsrt/include/dds/ddsrt/atomics.h b/src/ddsrt/include/dds/ddsrt/atomics.h index 5acbed4..294085e 100644 --- a/src/ddsrt/include/dds/ddsrt/atomics.h +++ b/src/ddsrt/include/dds/ddsrt/atomics.h @@ -59,6 +59,34 @@ typedef ddsrt_atomic_uintptr_t ddsrt_atomic_voidp_t; #error "Atomic operations are not supported" #endif +#if ! DDSRT_HAVE_ATOMIC64 +/* 64-bit atomics are not supported by all hardware, but it would be a shame not to use them when + they are available. That necessitates an alternative implementation when they are not, either in + the form of a different implementation where it is used, or as an emulation using a mutex in + ddsrt. It seems that the places where they'd be used end up adding a mutex, so an emulation in + ddsrt while being able to check whether it is supported by hardware is a sensible approach. */ +DDS_EXPORT uint64_t ddsrt_atomic_ld64 (const volatile ddsrt_atomic_uint64_t *x); +DDS_EXPORT void ddsrt_atomic_st64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT void ddsrt_atomic_inc64 (volatile ddsrt_atomic_uint64_t *x); +DDS_EXPORT uint64_t ddsrt_atomic_inc64_nv (volatile ddsrt_atomic_uint64_t *x); +DDS_EXPORT void ddsrt_atomic_dec64 (volatile ddsrt_atomic_uint64_t *x); +DDS_EXPORT uint64_t ddsrt_atomic_dec64_nv (volatile ddsrt_atomic_uint64_t *x); +DDS_EXPORT void ddsrt_atomic_add64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT uint64_t ddsrt_atomic_add64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT void ddsrt_atomic_sub64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT uint64_t ddsrt_atomic_sub64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT void ddsrt_atomic_and64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT uint64_t ddsrt_atomic_and64_ov (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT uint64_t ddsrt_atomic_and64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT void ddsrt_atomic_or64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT uint64_t ddsrt_atomic_or64_ov (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT uint64_t ddsrt_atomic_or64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v); +DDS_EXPORT int ddsrt_atomic_cas64 (volatile ddsrt_atomic_uint64_t *x, uint64_t exp, uint64_t des); +#endif + +void ddsrt_atomics_init (void); +void ddsrt_atomics_fini (void); + #if defined(__cplusplus) } #endif diff --git a/src/ddsrt/include/dds/ddsrt/atomics/gcc.h b/src/ddsrt/include/dds/ddsrt/atomics/gcc.h index a567e81..59e05a7 100644 --- a/src/ddsrt/include/dds/ddsrt/atomics/gcc.h +++ b/src/ddsrt/include/dds/ddsrt/atomics/gcc.h @@ -318,10 +318,18 @@ inline void ddsrt_atomic_fence_ldld (void) { #endif } inline void ddsrt_atomic_fence_acq (void) { +#if !(defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64) ddsrt_atomic_fence (); +#else + asm volatile ("" ::: "memory"); +#endif } inline void ddsrt_atomic_fence_rel (void) { +#if !(defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64) ddsrt_atomic_fence (); +#else + asm volatile ("" ::: "memory"); +#endif } #if defined (__cplusplus) diff --git a/src/ddsrt/src/atomics.c b/src/ddsrt/src/atomics.c index bc87a7b..5919a40 100644 --- a/src/ddsrt/src/atomics.c +++ b/src/ddsrt/src/atomics.c @@ -161,3 +161,208 @@ void ddsrt_atomic_lifo_pushmany (ddsrt_atomic_lifo_t *head, void *first, void *l } while (!ddsrt_atomic_casvoidp2 (&head->aba_head, a0, b0, a0+1, (uintptr_t)first)); } #endif + +#if DDSRT_HAVE_ATOMIC64 +void ddsrt_atomics_init (void) +{ +} + +void ddsrt_atomics_fini (void) +{ +} + +#else + +/* Emulation by hashing the variable's address to a small set of mutexes. */ +#include "dds/ddsrt/sync.h" + +#define N_MUTEXES_LG2 4 +#define N_MUTEXES (1 << N_MUTEXES_LG2) +static ddsrt_mutex_t mutexes[N_MUTEXES]; + +void ddsrt_atomics_init (void) +{ + for (int i = 0; i < N_MUTEXES; i++) + ddsrt_mutex_init (&mutexes[i]); +} + +void ddsrt_atomics_fini (void) +{ + for (int i = 0; i < N_MUTEXES; i++) + ddsrt_mutex_destroy (&mutexes[i]); +} + +static uint32_t atomic64_lock_index (const volatile ddsrt_atomic_uint64_t *x) +{ + const uint32_t u = (uint16_t) ((uintptr_t) x >> 3); + const uint32_t v = u * 0xb4817365; + return v >> (32 - N_MUTEXES_LG2); +} + +int ddsrt_atomic_cas64 (volatile ddsrt_atomic_uint64_t *x, uint64_t exp, uint64_t des) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + if (x->v == exp) + { + x->v = des; + ddsrt_mutex_unlock (&mutexes[idx]); + return true; + } + else + { + ddsrt_mutex_unlock (&mutexes[idx]); + return false; + } +} + +uint64_t ddsrt_atomic_ld64(const volatile ddsrt_atomic_uint64_t *x) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t v = x->v; + ddsrt_mutex_unlock (&mutexes[idx]); + return v; +} + +void ddsrt_atomic_st64(volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + x->v = v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +void ddsrt_atomic_inc64 (volatile ddsrt_atomic_uint64_t *x) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + ++x->v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +uint64_t ddsrt_atomic_inc64_nv (volatile ddsrt_atomic_uint64_t *x) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t nv = ++x->v; + ddsrt_mutex_unlock (&mutexes[idx]); + return nv; +} + +void ddsrt_atomic_dec64 (volatile ddsrt_atomic_uint64_t *x) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + --x->v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +uint64_t ddsrt_atomic_dec64_nv (volatile ddsrt_atomic_uint64_t *x) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t nv = --x->v; + ddsrt_mutex_unlock (&mutexes[idx]); + return nv; +} + +void ddsrt_atomic_add64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + x->v += v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +uint64_t ddsrt_atomic_add64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t ov = x->v; + const uint64_t nv = ov + v; + x->v = nv; + ddsrt_mutex_unlock (&mutexes[idx]); + return nv; +} + +void ddsrt_atomic_sub64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + x->v -= v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +uint64_t ddsrt_atomic_sub64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t ov = x->v; + const uint64_t nv = ov - v; + x->v = nv; + ddsrt_mutex_unlock (&mutexes[idx]); + return nv; +} + +void ddsrt_atomic_and64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + x->v &= v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +uint64_t ddsrt_atomic_and64_ov (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t ov = x->v; + const uint64_t nv = ov & v; + x->v = nv; + ddsrt_mutex_unlock (&mutexes[idx]); + return ov; +} + +uint64_t ddsrt_atomic_and64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t ov = x->v; + const uint64_t nv = ov & v; + x->v = nv; + ddsrt_mutex_unlock (&mutexes[idx]); + return nv; +} + +void ddsrt_atomic_or64 (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + x->v |= v; + ddsrt_mutex_unlock (&mutexes[idx]); +} + +uint64_t ddsrt_atomic_or64_ov (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t ov = x->v; + const uint64_t nv = ov | v; + x->v = nv; + ddsrt_mutex_unlock (&mutexes[idx]); + return ov; +} + +uint64_t ddsrt_atomic_or64_nv (volatile ddsrt_atomic_uint64_t *x, uint64_t v) +{ + const uint32_t idx = atomic64_lock_index (x); + ddsrt_mutex_lock (&mutexes[idx]); + const uint64_t ov = x->v; + const uint64_t nv = ov | v; + x->v = nv; + ddsrt_mutex_unlock (&mutexes[idx]); + return nv; +} + +#endif /* DDSRT_HAVE_ATOMIC64 */ diff --git a/src/ddsrt/src/cdtors.c b/src/ddsrt/src/cdtors.c index de63e32..e184098 100644 --- a/src/ddsrt/src/cdtors.c +++ b/src/ddsrt/src/cdtors.c @@ -43,6 +43,7 @@ retry: ddsrt_time_init(); #endif ddsrt_random_init(); + ddsrt_atomics_init(); ddsrt_atomic_or32(&init_status, INIT_STATUS_OK); } else { while (v > 1 && !(v & INIT_STATUS_OK)) { @@ -68,6 +69,7 @@ void ddsrt_fini (void) { ddsrt_mutex_destroy(&init_mutex); ddsrt_random_fini(); + ddsrt_atomics_fini(); #if _WIN32 ddsrt_winsock_fini(); ddsrt_time_fini();