| @@ -84,88 +84,122 @@ struct WriteLock { | |||
| }; | |||
| /** Barrier based on mutexes. | |||
| Not finished or tested, do not use. | |||
| */ | |||
| struct Barrier { | |||
| int count = 0; | |||
| uint8_t step = 0; | |||
| int threads = 0; | |||
| std::mutex mutex; | |||
| std::condition_variable cv; | |||
| int count = 0; | |||
| int total = 0; | |||
| void setThreads(int threads) { | |||
| this->threads = threads; | |||
| } | |||
| void wait() { | |||
| // Waiting on one thread is trivial. | |||
| if (total <= 1) | |||
| return; | |||
| std::unique_lock<std::mutex> lock(mutex); | |||
| int id = ++count; | |||
| if (id == total) { | |||
| uint8_t s = step; | |||
| if (++count >= threads) { | |||
| // We're the last thread. Reset next phase. | |||
| count = 0; | |||
| // Allow other threads to exit wait() | |||
| step++; | |||
| cv.notify_all(); | |||
| return; | |||
| } | |||
| else { | |||
| cv.wait(lock); | |||
| } | |||
| cv.wait(lock, [&] { | |||
| return step != s; | |||
| }); | |||
| } | |||
| }; | |||
| /** 2-phase barrier based on spin-locking. | |||
| */ | |||
| struct SpinBarrier { | |||
| std::atomic<int> count{0}; | |||
| int total = 0; | |||
| std::atomic<uint8_t> step{0}; | |||
| int threads = 0; | |||
| /** Must be called when no threads are calling wait(). | |||
| */ | |||
| void setThreads(int threads) { | |||
| this->threads = threads; | |||
| } | |||
| void wait() { | |||
| int id = ++count; | |||
| if (id == total) { | |||
| uint8_t s = step; | |||
| if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) { | |||
| // We're the last thread. Reset next phase. | |||
| count = 0; | |||
| // Allow other threads to exit wait() | |||
| step++; | |||
| return; | |||
| } | |||
| else { | |||
| while (count != 0) { | |||
| _mm_pause(); | |||
| } | |||
| // Spin until the last thread begins waiting | |||
| while (true) { | |||
| if (step.load(std::memory_order_relaxed) != s) | |||
| return; | |||
| __builtin_ia32_pause(); | |||
| } | |||
| } | |||
| }; | |||
| /** Spinlocks until all `total` threads are waiting. | |||
| If `yield` is set to true at any time, all threads will switch to waiting on a mutex instead. | |||
| All threads must return before beginning a new phase. Alternating between two barriers solves this problem. | |||
| /** Barrier that spin-locks until yield() is called, and then all threads switch to a mutex. | |||
| yield() should be called if it is likely that all threads will block for a while and continuing to spin-lock is unnecessary. | |||
| Saves CPU power after yield is called. | |||
| */ | |||
| struct HybridBarrier { | |||
| std::atomic<int> count {0}; | |||
| int total = 0; | |||
| std::atomic<int> count{0}; | |||
| std::atomic<uint8_t> step{0}; | |||
| int threads = 0; | |||
| std::atomic<bool> yielded{false}; | |||
| std::mutex mutex; | |||
| std::condition_variable cv; | |||
| std::atomic<bool> yield {false}; | |||
| void setThreads(int threads) { | |||
| this->threads = threads; | |||
| } | |||
| void wait() { | |||
| int id = ++count; | |||
| void yield() { | |||
| yielded = true; | |||
| } | |||
| // End and reset phase if this is the last thread | |||
| if (id == total) { | |||
| void wait() { | |||
| uint8_t s = step; | |||
| if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) { | |||
| // We're the last thread. Reset next phase. | |||
| count = 0; | |||
| if (yield) { | |||
| bool wasYielded = yielded; | |||
| yielded = false; | |||
| // Allow other threads to exit wait() | |||
| step++; | |||
| if (wasYielded) { | |||
| std::unique_lock<std::mutex> lock(mutex); | |||
| cv.notify_all(); | |||
| yield = false; | |||
| } | |||
| return; | |||
| } | |||
| // Spinlock | |||
| while (!yield) { | |||
| if (count == 0) | |||
| // Spin until the last thread begins waiting | |||
| while (!yielded.load(std::memory_order_relaxed)) { | |||
| if (step.load(std::memory_order_relaxed) != s) | |||
| return; | |||
| _mm_pause(); | |||
| __builtin_ia32_pause(); | |||
| } | |||
| // Wait on mutex | |||
| { | |||
| std::unique_lock<std::mutex> lock(mutex); | |||
| cv.wait(lock, [&] { | |||
| return count == 0; | |||
| }); | |||
| } | |||
| // Wait on mutex CV | |||
| std::unique_lock<std::mutex> lock(mutex); | |||
| cv.wait(lock, [&] { | |||
| return step != s; | |||
| }); | |||
| } | |||
| }; | |||
| @@ -296,8 +330,8 @@ static void Engine_relaunchWorkers(Engine* that, int threadCount) { | |||
| internal->threadCount = threadCount; | |||
| // Set barrier counts | |||
| internal->engineBarrier.total = threadCount; | |||
| internal->workerBarrier.total = threadCount; | |||
| internal->engineBarrier.setThreads(threadCount); | |||
| internal->workerBarrier.setThreads(threadCount); | |||
| if (threadCount > 0) { | |||
| // Create and start engine workers | |||
| @@ -641,7 +675,7 @@ float Engine::getSampleTime() { | |||
| void Engine::yieldWorkers() { | |||
| internal->workerBarrier.yield = true; | |||
| internal->workerBarrier.yield(); | |||
| } | |||