diff --git a/src/engine/Engine.cpp b/src/engine/Engine.cpp index 8fcffffc..eef042e8 100644 --- a/src/engine/Engine.cpp +++ b/src/engine/Engine.cpp @@ -84,88 +84,122 @@ struct WriteLock { }; +/** Barrier based on mutexes. +Not finished or tested, do not use. +*/ struct Barrier { + int count = 0; + uint8_t step = 0; + int threads = 0; + std::mutex mutex; std::condition_variable cv; - int count = 0; - int total = 0; + + void setThreads(int threads) { + this->threads = threads; + } void wait() { - // Waiting on one thread is trivial. - if (total <= 1) - return; std::unique_lock lock(mutex); - int id = ++count; - if (id == total) { + uint8_t s = step; + if (++count >= threads) { + // We're the last thread. Reset next phase. count = 0; + // Allow other threads to exit wait() + step++; cv.notify_all(); + return; } - else { - cv.wait(lock); - } + + cv.wait(lock, [&] { + return step != s; + }); } }; +/** 2-phase barrier based on spin-locking. +*/ struct SpinBarrier { std::atomic count{0}; - int total = 0; + std::atomic step{0}; + int threads = 0; + + /** Must be called when no threads are calling wait(). + */ + void setThreads(int threads) { + this->threads = threads; + } void wait() { - int id = ++count; - if (id == total) { + uint8_t s = step; + if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) { + // We're the last thread. Reset next phase. count = 0; + // Allow other threads to exit wait() + step++; + return; } - else { - while (count != 0) { - _mm_pause(); - } + + // Spin until the last thread begins waiting + while (true) { + if (step.load(std::memory_order_relaxed) != s) + return; + __builtin_ia32_pause(); } } }; -/** Spinlocks until all `total` threads are waiting. -If `yield` is set to true at any time, all threads will switch to waiting on a mutex instead. -All threads must return before beginning a new phase. Alternating between two barriers solves this problem. +/** Barrier that spin-locks until yield() is called, and then all threads switch to a mutex. +yield() should be called if it is likely that all threads will block for a while and continuing to spin-lock is unnecessary. +Saves CPU power after yield is called. */ struct HybridBarrier { - std::atomic count {0}; - int total = 0; + std::atomic count{0}; + std::atomic step{0}; + int threads = 0; + std::atomic yielded{false}; std::mutex mutex; std::condition_variable cv; - std::atomic yield {false}; + void setThreads(int threads) { + this->threads = threads; + } - void wait() { - int id = ++count; + void yield() { + yielded = true; + } - // End and reset phase if this is the last thread - if (id == total) { + void wait() { + uint8_t s = step; + if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) { + // We're the last thread. Reset next phase. count = 0; - if (yield) { + bool wasYielded = yielded; + yielded = false; + // Allow other threads to exit wait() + step++; + if (wasYielded) { std::unique_lock lock(mutex); cv.notify_all(); - yield = false; } return; } - // Spinlock - while (!yield) { - if (count == 0) + // Spin until the last thread begins waiting + while (!yielded.load(std::memory_order_relaxed)) { + if (step.load(std::memory_order_relaxed) != s) return; - _mm_pause(); + __builtin_ia32_pause(); } - // Wait on mutex - { - std::unique_lock lock(mutex); - cv.wait(lock, [&] { - return count == 0; - }); - } + // Wait on mutex CV + std::unique_lock lock(mutex); + cv.wait(lock, [&] { + return step != s; + }); } }; @@ -296,8 +330,8 @@ static void Engine_relaunchWorkers(Engine* that, int threadCount) { internal->threadCount = threadCount; // Set barrier counts - internal->engineBarrier.total = threadCount; - internal->workerBarrier.total = threadCount; + internal->engineBarrier.setThreads(threadCount); + internal->workerBarrier.setThreads(threadCount); if (threadCount > 0) { // Create and start engine workers @@ -641,7 +675,7 @@ float Engine::getSampleTime() { void Engine::yieldWorkers() { - internal->workerBarrier.yield = true; + internal->workerBarrier.yield(); }