Browse Source

Fix ringbuffer thread safety on ARM. Fix #715 #388 (#886)

* fix ringbuffer thread safety on ARM. fix #715 #388

This patch addresses the thread safety problem of `jack_ringbuffer_t`
mentioned in #715 and #388. The overbound read bug caused by this problem
is impossible to reproduce on x86 due to its strong memory ordering, but
it is a problem on ARM and other weakly ordered architectures.

Basically, the main problem is that, on a weakly ordered architecture,
it is possible that the pointer increment after `memcpy` becomes visible
to the other thread before `memcpy` finishes:

	memcpy (&(rb->buf[rb->write_ptr]), src, n1);
	// vvv can be visible to reading thread before memcpy finishes
	rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask;

If this happens, the other thread can read the remaining garbage values
in `rb->buf` due to be overwritten by the unfinished `memcpy`.

To fix this, an explicit pair of release/acquire memory fences [1] is
used to ensure the copy on the other thread *happens after* the `memcpy`
finishes so no garbage values can be read.

[1]: https://preshing.com/20130922/acquire-and-release-fences/

* remove volatile qualifier on ringbuf r/w pointers

The volatile constraints are excess when compiler barriers are present.
It generates unnecessary `mov` instructions when pointers aren't going
to be updated.

* simplify read/write space calculations

This optimization is possible because the buffer size is always a power
of 2. See [1] for details.

[1]: https://github.com/drobilla/zix/pull/1#issuecomment-1212687196

* move acq fences to separate lines
tags/v1.9.22
K GitHub 2 years ago
parent
commit
75516ffe6d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 37 deletions
  1. +2
    -2
      common/jack/ringbuffer.h
  2. +50
    -35
      common/ringbuffer.c

+ 2
- 2
common/jack/ringbuffer.h View File

@@ -50,8 +50,8 @@ jack_ringbuffer_data_t ;


typedef struct { typedef struct {
char *buf; char *buf;
volatile size_t write_ptr;
volatile size_t read_ptr;
size_t write_ptr;
size_t read_ptr;
size_t size; size_t size;
size_t size_mask; size_t size_mask;
int mlocked; int mlocked;


+ 50
- 35
common/ringbuffer.c View File

@@ -27,6 +27,35 @@
#endif /* USE_MLOCK */ #endif /* USE_MLOCK */
#include "JackCompilerDeps.h" #include "JackCompilerDeps.h"


/* Portable definitions for acquire and release fences */
#if defined(_MSC_VER)
#if defined(_M_AMD64) || defined(_M_IX86) || defined(_M_X64)
/* Only compiler fences are needed on x86. In fact, GCC
* will generate no instructions for acq/rel fences on
* x86 */
#include <intrin.h>
#define JACK_ACQ_FENCE() _ReadBarrier()
#define JACK_REL_FENCE() _WriteBarrier()
#else
/* Use full memory fence for non-x86 systems with msvc */
#include <windows.h>
#define JACK_ACQ_FENCE() MemoryBarrier()
#define JACK_REL_FENCE() MemoryBarrier()
#endif
#elif defined(__GNUC__)
#ifdef __ATOMIC_ACQUIRE
#define JACK_ACQ_FENCE() __atomic_thread_fence(__ATOMIC_ACQUIRE)
#define JACK_REL_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE)
#else
/* Fallback to the legacy __sync builtin (full memory fence) */
#define JACK_ACQ_FENCE() __sync_synchronize()
#define JACK_REL_FENCE() __sync_synchronize()
#endif
#else
#define JACK_ACQ_FENCE()
#define JACK_REL_FENCE()
#endif

typedef struct { typedef struct {
char *buf; char *buf;
size_t len; size_t len;
@@ -35,8 +64,8 @@ jack_ringbuffer_data_t ;


typedef struct { typedef struct {
char *buf; char *buf;
volatile size_t write_ptr;
volatile size_t read_ptr;
size_t write_ptr;
size_t read_ptr;
size_t size; size_t size;
size_t size_mask; size_t size_mask;
int mlocked; int mlocked;
@@ -126,7 +155,7 @@ jack_ringbuffer_reset (jack_ringbuffer_t * rb)
{ {
rb->read_ptr = 0; rb->read_ptr = 0;
rb->write_ptr = 0; rb->write_ptr = 0;
memset(rb->buf, 0, rb->size);
memset(rb->buf, 0, rb->size);
} }


/* Reset the read and write pointers to zero. This is not thread /* Reset the read and write pointers to zero. This is not thread
@@ -152,13 +181,10 @@ jack_ringbuffer_read_space (const jack_ringbuffer_t * rb)
size_t w, r; size_t w, r;


w = rb->write_ptr; w = rb->write_ptr;
JACK_ACQ_FENCE();
r = rb->read_ptr; r = rb->read_ptr;


if (w > r) {
return w - r;
} else {
return (w - r + rb->size) & rb->size_mask;
}
return (w - r) & rb->size_mask;
} }


/* Return the number of bytes available for writing. This is the /* Return the number of bytes available for writing. This is the
@@ -172,14 +198,9 @@ jack_ringbuffer_write_space (const jack_ringbuffer_t * rb)


w = rb->write_ptr; w = rb->write_ptr;
r = rb->read_ptr; r = rb->read_ptr;
JACK_ACQ_FENCE();


if (w > r) {
return ((r - w + rb->size) & rb->size_mask) - 1;
} else if (w < r) {
return (r - w) - 1;
} else {
return rb->size - 1;
}
return (r - w - 1) & rb->size_mask;
} }


/* The copying data reader. Copy at most `cnt' bytes from `rb' to /* The copying data reader. Copy at most `cnt' bytes from `rb' to
@@ -199,6 +220,8 @@ jack_ringbuffer_read (jack_ringbuffer_t * rb, char *dest, size_t cnt)


to_read = cnt > free_cnt ? free_cnt : cnt; to_read = cnt > free_cnt ? free_cnt : cnt;


/* note: relaxed load here, rb->read_ptr cannot be
* modified from writing thread */
cnt2 = rb->read_ptr + to_read; cnt2 = rb->read_ptr + to_read;


if (cnt2 > rb->size) { if (cnt2 > rb->size) {
@@ -210,10 +233,12 @@ jack_ringbuffer_read (jack_ringbuffer_t * rb, char *dest, size_t cnt)
} }


memcpy (dest, &(rb->buf[rb->read_ptr]), n1); memcpy (dest, &(rb->buf[rb->read_ptr]), n1);
JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->read_ptr = (rb->read_ptr + n1) & rb->size_mask; rb->read_ptr = (rb->read_ptr + n1) & rb->size_mask;


if (n2) { if (n2) {
memcpy (dest + n1, &(rb->buf[rb->read_ptr]), n2); memcpy (dest + n1, &(rb->buf[rb->read_ptr]), n2);
JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->read_ptr = (rb->read_ptr + n2) & rb->size_mask; rb->read_ptr = (rb->read_ptr + n2) & rb->size_mask;
} }


@@ -278,6 +303,8 @@ jack_ringbuffer_write (jack_ringbuffer_t * rb, const char *src, size_t cnt)


to_write = cnt > free_cnt ? free_cnt : cnt; to_write = cnt > free_cnt ? free_cnt : cnt;


/* note: relaxed load here, rb->write_ptr cannot be
* modified from reading thread */
cnt2 = rb->write_ptr + to_write; cnt2 = rb->write_ptr + to_write;


if (cnt2 > rb->size) { if (cnt2 > rb->size) {
@@ -289,10 +316,12 @@ jack_ringbuffer_write (jack_ringbuffer_t * rb, const char *src, size_t cnt)
} }


memcpy (&(rb->buf[rb->write_ptr]), src, n1); memcpy (&(rb->buf[rb->write_ptr]), src, n1);
JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask; rb->write_ptr = (rb->write_ptr + n1) & rb->size_mask;


if (n2) { if (n2) {
memcpy (&(rb->buf[rb->write_ptr]), src + n1, n2); memcpy (&(rb->buf[rb->write_ptr]), src + n1, n2);
JACK_REL_FENCE(); /* ensure pointer increment happens after copy */
rb->write_ptr = (rb->write_ptr + n2) & rb->size_mask; rb->write_ptr = (rb->write_ptr + n2) & rb->size_mask;
} }


@@ -305,6 +334,7 @@ LIB_EXPORT void
jack_ringbuffer_read_advance (jack_ringbuffer_t * rb, size_t cnt) jack_ringbuffer_read_advance (jack_ringbuffer_t * rb, size_t cnt)
{ {
size_t tmp = (rb->read_ptr + cnt) & rb->size_mask; size_t tmp = (rb->read_ptr + cnt) & rb->size_mask;
JACK_REL_FENCE(); /* ensure pointer increment happens after copy (by user) */
rb->read_ptr = tmp; rb->read_ptr = tmp;
} }


@@ -314,6 +344,7 @@ LIB_EXPORT void
jack_ringbuffer_write_advance (jack_ringbuffer_t * rb, size_t cnt) jack_ringbuffer_write_advance (jack_ringbuffer_t * rb, size_t cnt)
{ {
size_t tmp = (rb->write_ptr + cnt) & rb->size_mask; size_t tmp = (rb->write_ptr + cnt) & rb->size_mask;
JACK_REL_FENCE(); /* ensure pointer increment happens after copy (by user) */
rb->write_ptr = tmp; rb->write_ptr = tmp;
} }


@@ -328,17 +359,10 @@ jack_ringbuffer_get_read_vector (const jack_ringbuffer_t * rb,
{ {
size_t free_cnt; size_t free_cnt;
size_t cnt2; size_t cnt2;
size_t w, r;
size_t r;


w = rb->write_ptr;
r = rb->read_ptr; r = rb->read_ptr;

if (w > r) {
free_cnt = w - r;
} else {
free_cnt = (w - r + rb->size) & rb->size_mask;
}

free_cnt = jack_ringbuffer_read_space(rb);
cnt2 = r + free_cnt; cnt2 = r + free_cnt;


if (cnt2 > rb->size) { if (cnt2 > rb->size) {
@@ -372,19 +396,10 @@ jack_ringbuffer_get_write_vector (const jack_ringbuffer_t * rb,
{ {
size_t free_cnt; size_t free_cnt;
size_t cnt2; size_t cnt2;
size_t w, r;
size_t w;


w = rb->write_ptr; w = rb->write_ptr;
r = rb->read_ptr;

if (w > r) {
free_cnt = ((r - w + rb->size) & rb->size_mask) - 1;
} else if (w < r) {
free_cnt = (r - w) - 1;
} else {
free_cnt = rb->size - 1;
}

free_cnt = jack_ringbuffer_write_space(rb);
cnt2 = w + free_cnt; cnt2 = w + free_cnt;


if (cnt2 > rb->size) { if (cnt2 > rb->size) {


Loading…
Cancel
Save