[PATCH] fix incorrect memory fence usage in zix ringbuf
Export this patch
The original implementation seems to be misguided by the naming of
barrier macros. The barrier before read pointer increment should be a
release fence instead of an acquire fence,
- ZIX_READ_BARRIER();
+ ZIX_WRITE_BARRIER();
ring->read_head = (r + size) & ring->size_mask;
or otherwise the fence cannot prevent store-store reordering [1], i.e.
stores happens before pointer increment might be visible after the
increment to the other thread. I also added the missing acquire fences
when loading pointer from other thread.
I don't think this bug would lead to a major problem, since it only
affects read operations, which does not store to the shared buffer, but
since memory ordering problems are almost impossible to debug, it's
better to be safe here.
Also, on x86, acquire/release fences should be no-ops [2]. Using full
memory fences on x86 msvc brings unnecessary syncing overhead. They
should be only enabled for ARM.
[1]: https://preshing.com/20130922/acquire-and-release-fences/
[2]: https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Signed-off-by: Yu Fang <o@krj.st>
---
ext/zix/zix/ring.c | 36 +++++++++++++++++++++++++ -----------
1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/ext/zix/zix/ring.c b/ext/zix/zix/ring.c
index 10ac63dc4..b7c9562f4 100644
--- a/ext/zix/zix/ring.c
+++ b/ext/zix/zix/ring.c
@@ -31,9 +31,17 @@
#endif
#if defined(_MSC_VER)
- # include <windows.h>
- # define ZIX_READ_BARRIER() MemoryBarrier()
- # define ZIX_WRITE_BARRIER() MemoryBarrier()
+ # if defined(_M_AMD64) || defined(_M_IX86) || defined(_M_X64)
+ /* acquire and release fences are only necessary for
+ * non-x86 systems. In fact, gcc will generate no
+ * instructions for acq/rel fences on x86. */
+ # define ZIX_READ_BARRIER()
+ # define ZIX_WRITE_BARRIER()
+ # else
+ # include <windows.h>
+ # define ZIX_READ_BARRIER() MemoryBarrier()
+ # define ZIX_WRITE_BARRIER() MemoryBarrier()
+ # endif
#elif defined(__GNUC__)
# define ZIX_READ_BARRIER() __atomic_thread_fence(__ATOMIC_ACQUIRE)
# define ZIX_WRITE_BARRIER() __atomic_thread_fence(__ATOMIC_RELEASE)
@@ -115,7 +123,9 @@ read_space_internal(const ZixRing* ring, uint32_t r, uint32_t w)
uint32_t
zix_ring_read_space(const ZixRing* ring)
{
- return read_space_internal(ring, ring->read_head, ring->write_head);
+ const uint32_t r = ring->read_head;
+ const uint32_t w = ring->write_head; ZIX_READ_BARRIER();
+ return read_space_internal(ring, r, w);
}
static inline uint32_t
@@ -135,7 +145,9 @@ write_space_internal(const ZixRing* ring, uint32_t r, uint32_t w)
uint32_t
zix_ring_write_space(const ZixRing* ring)
{
- return write_space_internal(ring, ring->read_head, ring->write_head);
+ const uint32_t r = ring->read_head; ZIX_READ_BARRIER();
+ const uint32_t w = ring->write_head;
+ return write_space_internal(ring, r, w);
}
uint32_t
@@ -169,17 +181,19 @@ peek_internal(const ZixRing* ring,
uint32_t
zix_ring_peek(ZixRing* ring, void* dst, uint32_t size)
{
- return peek_internal(ring, ring->read_head, ring->write_head, size, dst);
+ const uint32_t r = ring->read_head;
+ const uint32_t w = ring->write_head; ZIX_READ_BARRIER();
+ return peek_internal(ring, r, w, size, dst);
}
uint32_t
zix_ring_read(ZixRing* ring, void* dst, uint32_t size)
{
const uint32_t r = ring->read_head;
- const uint32_t w = ring->write_head;
+ const uint32_t w = ring->write_head; ZIX_READ_BARRIER();
if (peek_internal(ring, r, w, size, dst)) {
- ZIX_READ_BARRIER();
+ ZIX_WRITE_BARRIER();
ring->read_head = (r + size) & ring->size_mask;
return size;
}
@@ -191,12 +205,12 @@ uint32_t
zix_ring_skip(ZixRing* ring, uint32_t size)
{
const uint32_t r = ring->read_head;
- const uint32_t w = ring->write_head;
+ const uint32_t w = ring->write_head; ZIX_READ_BARRIER();
if (read_space_internal(ring, r, w) < size) {
return 0;
}
- ZIX_READ_BARRIER();
+ ZIX_WRITE_BARRIER();
ring->read_head = (r + size) & ring->size_mask;
return size;
}
@@ -204,7 +218,7 @@ zix_ring_skip(ZixRing* ring, uint32_t size)
uint32_t
zix_ring_write(ZixRing* ring, const void* src, uint32_t size)
{
- const uint32_t r = ring->read_head;
+ const uint32_t r = ring->read_head; ZIX_READ_BARRIER();
const uint32_t w = ring->write_head;
if (write_space_internal(ring, r, w) < size) {
return 0;
--
2.37.1
Thanks for the patch but I've now dropped the vendored version of zix
and started using the upstream library
https://github.com/drobilla/zix/tree/atomic-ring
It should be fixed in the atomic-ring branch (see this commit:
https://github.com/drobilla/zix/commit/a286841ea2b93ca0f5163f66d897006796e0ccff
).