Browse Source

Refactor all Barrier implementations. Explicitly use appropriate memory ordering everywhere. Use __yield() on ARM64.

tags/v2.6.1
Andrew Belt 1 month ago
parent
commit
617f6ddafd
1 changed files with 69 additions and 52 deletions
  1. +69
    -52
      src/engine/Engine.cpp

+ 69
- 52
src/engine/Engine.cpp View File

@@ -21,70 +21,79 @@ namespace rack {
namespace engine { namespace engine {




/** Barrier based on mutexes.
Not finished or tested, do not use.
inline void cpuPause() {
#if defined ARCH_X64
_mm_pause();
#elif defined ARCH_ARM64
__yield();
#endif
}


/** Multiple-phase barrier based on C++ mutexes, as a reference.
*/ */
struct Barrier { struct Barrier {
int count = 0;
uint8_t step = 0;
int threads = 0;
size_t threads = 0;
size_t count = 0;
size_t phase = 0;


std::mutex mutex; std::mutex mutex;
std::condition_variable cv; std::condition_variable cv;


void setThreads(int threads) {
void setThreads(size_t threads) {
this->threads = threads; this->threads = threads;
} }


void wait() { void wait() {
std::unique_lock<std::mutex> lock(mutex); std::unique_lock<std::mutex> lock(mutex);
uint8_t s = step;
size_t currentPhase = phase;

// Check if we're the last thread.
if (++count >= threads) { if (++count >= threads) {
// We're the last thread. Reset next phase.
// Advance phase and reset count
count = 0; count = 0;
// Allow other threads to exit wait()
step++;
phase++;
// Notify all other threads
cv.notify_all(); cv.notify_all();
return; return;
} }


// Unlock and wait on phase to change
cv.wait(lock, [&] { cv.wait(lock, [&] {
return step != s;
return phase != currentPhase;
}); });
} }
}; };




/** 2-phase barrier based on spin-locking.
/** Multiple-phase barrier based on spin-locking.
*/ */
struct SpinBarrier { struct SpinBarrier {
std::atomic<int> count{0};
std::atomic<uint8_t> step{0};
int threads = 0;
size_t threads = 0;
std::atomic<size_t> count{0};
std::atomic<size_t> phase{0};


/** Must be called when no threads are calling wait().
*/
void setThreads(int threads) {
void setThreads(size_t threads) {
this->threads = threads; this->threads = threads;
} }


void wait() { void wait() {
uint8_t s = step;
if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) {
// We're the last thread. Reset next phase.
count = 0;
// Allow other threads to exit wait()
step++;
size_t currentPhase = phase.load(std::memory_order_acquire);

if (count.fetch_add(1, std::memory_order_acq_rel) + 1 >= threads) {
// Reset count
count.store(0, std::memory_order_release);
// Advance phase, which notifies all other threads, which are all spinning
phase.fetch_add(1, std::memory_order_release);
return; return;
} }


// Spin until the last thread begins waiting
// Spin until the phase is changed by the last thread
while (true) { while (true) {
if (step.load(std::memory_order_relaxed) != s)
if (phase.load(std::memory_order_acquire) != currentPhase)
return; return;
#if defined ARCH_X64
__builtin_ia32_pause();
#endif
// std::this_thread::yield();
cpuPause();
} }
} }
}; };
@@ -95,51 +104,59 @@ yield() should be called if it is likely that all threads will block for a while
Saves CPU power after yield is called. Saves CPU power after yield is called.
*/ */
struct HybridBarrier { struct HybridBarrier {
std::atomic<int> count{0};
std::atomic<uint8_t> step{0};
int threads = 0;

size_t threads = 0;
std::atomic<size_t> count{0};
std::atomic<size_t> phase{0};
std::atomic<bool> yielded{false}; std::atomic<bool> yielded{false};

std::mutex mutex; std::mutex mutex;
std::condition_variable cv; std::condition_variable cv;


void setThreads(int threads) {
void setThreads(size_t threads) {
this->threads = threads; this->threads = threads;
} }


void yield() { void yield() {
yielded = true;
yielded.store(true, std::memory_order_release);
} }


void wait() { void wait() {
uint8_t s = step;
if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) {
// We're the last thread. Reset next phase.
count = 0;
bool wasYielded = yielded;
yielded = false;
// Allow other threads to exit wait()
step++;
if (wasYielded) {
size_t currentPhase = phase.load(std::memory_order_acquire);
// Check if we're the last thread
if (count.fetch_add(1, std::memory_order_acq_rel) + 1 >= threads) {
// Reset count
count.store(0, std::memory_order_release);
// If yielded, advance phase and notify all other threads
if (yielded.load(std::memory_order_acquire)) {
std::unique_lock<std::mutex> lock(mutex); std::unique_lock<std::mutex> lock(mutex);
yielded.store(false, std::memory_order_release);
phase.fetch_add(1, std::memory_order_release);
cv.notify_all(); cv.notify_all();
return;
} }

// Advance phase, which notifies all other threads, which are all spinning
phase.fetch_add(1, std::memory_order_release);
return; return;
} }


// Spin until the last thread begins waiting
while (!yielded.load(std::memory_order_relaxed)) {
if (step.load(std::memory_order_relaxed) != s)
// Spin until the phase is changed by the last thread, or yield() is called
while (true) {
if (phase.load(std::memory_order_acquire) != currentPhase)
return; return;
#if defined ARCH_X64
__builtin_ia32_pause();
#endif
if (yielded.load(std::memory_order_acquire))
break;
// std::this_thread::yield();
cpuPause();
} }


// Wait on mutex CV

// yield() was called, so use cv to wait on phase to be changed by the last thread
std::unique_lock<std::mutex> lock(mutex); std::unique_lock<std::mutex> lock(mutex);
cv.wait(lock, [&] { cv.wait(lock, [&] {
return step != s;
return phase.load(std::memory_order_acquire) != currentPhase;
}); });
} }
}; };


Loading…
Cancel
Save