| @@ -84,88 +84,122 @@ struct WriteLock { | |||||
| }; | }; | ||||
| /** Barrier based on mutexes. | |||||
| Not finished or tested, do not use. | |||||
| */ | |||||
| struct Barrier { | struct Barrier { | ||||
| int count = 0; | |||||
| uint8_t step = 0; | |||||
| int threads = 0; | |||||
| std::mutex mutex; | std::mutex mutex; | ||||
| std::condition_variable cv; | std::condition_variable cv; | ||||
| int count = 0; | |||||
| int total = 0; | |||||
| void setThreads(int threads) { | |||||
| this->threads = threads; | |||||
| } | |||||
| void wait() { | void wait() { | ||||
| // Waiting on one thread is trivial. | |||||
| if (total <= 1) | |||||
| return; | |||||
| std::unique_lock<std::mutex> lock(mutex); | std::unique_lock<std::mutex> lock(mutex); | ||||
| int id = ++count; | |||||
| if (id == total) { | |||||
| uint8_t s = step; | |||||
| if (++count >= threads) { | |||||
| // We're the last thread. Reset next phase. | |||||
| count = 0; | count = 0; | ||||
| // Allow other threads to exit wait() | |||||
| step++; | |||||
| cv.notify_all(); | cv.notify_all(); | ||||
| return; | |||||
| } | } | ||||
| else { | |||||
| cv.wait(lock); | |||||
| } | |||||
| cv.wait(lock, [&] { | |||||
| return step != s; | |||||
| }); | |||||
| } | } | ||||
| }; | }; | ||||
| /** 2-phase barrier based on spin-locking. | |||||
| */ | |||||
| struct SpinBarrier { | struct SpinBarrier { | ||||
| std::atomic<int> count{0}; | std::atomic<int> count{0}; | ||||
| int total = 0; | |||||
| std::atomic<uint8_t> step{0}; | |||||
| int threads = 0; | |||||
| /** Must be called when no threads are calling wait(). | |||||
| */ | |||||
| void setThreads(int threads) { | |||||
| this->threads = threads; | |||||
| } | |||||
| void wait() { | void wait() { | ||||
| int id = ++count; | |||||
| if (id == total) { | |||||
| uint8_t s = step; | |||||
| if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) { | |||||
| // We're the last thread. Reset next phase. | |||||
| count = 0; | count = 0; | ||||
| // Allow other threads to exit wait() | |||||
| step++; | |||||
| return; | |||||
| } | } | ||||
| else { | |||||
| while (count != 0) { | |||||
| _mm_pause(); | |||||
| } | |||||
| // Spin until the last thread begins waiting | |||||
| while (true) { | |||||
| if (step.load(std::memory_order_relaxed) != s) | |||||
| return; | |||||
| __builtin_ia32_pause(); | |||||
| } | } | ||||
| } | } | ||||
| }; | }; | ||||
| /** Spinlocks until all `total` threads are waiting. | |||||
| If `yield` is set to true at any time, all threads will switch to waiting on a mutex instead. | |||||
| All threads must return before beginning a new phase. Alternating between two barriers solves this problem. | |||||
| /** Barrier that spin-locks until yield() is called, and then all threads switch to a mutex. | |||||
| yield() should be called if it is likely that all threads will block for a while and continuing to spin-lock is unnecessary. | |||||
| Saves CPU power after yield is called. | |||||
| */ | */ | ||||
| struct HybridBarrier { | struct HybridBarrier { | ||||
| std::atomic<int> count {0}; | |||||
| int total = 0; | |||||
| std::atomic<int> count{0}; | |||||
| std::atomic<uint8_t> step{0}; | |||||
| int threads = 0; | |||||
| std::atomic<bool> yielded{false}; | |||||
| std::mutex mutex; | std::mutex mutex; | ||||
| std::condition_variable cv; | std::condition_variable cv; | ||||
| std::atomic<bool> yield {false}; | |||||
| void setThreads(int threads) { | |||||
| this->threads = threads; | |||||
| } | |||||
| void wait() { | |||||
| int id = ++count; | |||||
| void yield() { | |||||
| yielded = true; | |||||
| } | |||||
| // End and reset phase if this is the last thread | |||||
| if (id == total) { | |||||
| void wait() { | |||||
| uint8_t s = step; | |||||
| if (count.fetch_add(1, std::memory_order_acquire) + 1 >= threads) { | |||||
| // We're the last thread. Reset next phase. | |||||
| count = 0; | count = 0; | ||||
| if (yield) { | |||||
| bool wasYielded = yielded; | |||||
| yielded = false; | |||||
| // Allow other threads to exit wait() | |||||
| step++; | |||||
| if (wasYielded) { | |||||
| std::unique_lock<std::mutex> lock(mutex); | std::unique_lock<std::mutex> lock(mutex); | ||||
| cv.notify_all(); | cv.notify_all(); | ||||
| yield = false; | |||||
| } | } | ||||
| return; | return; | ||||
| } | } | ||||
| // Spinlock | |||||
| while (!yield) { | |||||
| if (count == 0) | |||||
| // Spin until the last thread begins waiting | |||||
| while (!yielded.load(std::memory_order_relaxed)) { | |||||
| if (step.load(std::memory_order_relaxed) != s) | |||||
| return; | return; | ||||
| _mm_pause(); | |||||
| __builtin_ia32_pause(); | |||||
| } | } | ||||
| // Wait on mutex | |||||
| { | |||||
| std::unique_lock<std::mutex> lock(mutex); | |||||
| cv.wait(lock, [&] { | |||||
| return count == 0; | |||||
| }); | |||||
| } | |||||
| // Wait on mutex CV | |||||
| std::unique_lock<std::mutex> lock(mutex); | |||||
| cv.wait(lock, [&] { | |||||
| return step != s; | |||||
| }); | |||||
| } | } | ||||
| }; | }; | ||||
| @@ -296,8 +330,8 @@ static void Engine_relaunchWorkers(Engine* that, int threadCount) { | |||||
| internal->threadCount = threadCount; | internal->threadCount = threadCount; | ||||
| // Set barrier counts | // Set barrier counts | ||||
| internal->engineBarrier.total = threadCount; | |||||
| internal->workerBarrier.total = threadCount; | |||||
| internal->engineBarrier.setThreads(threadCount); | |||||
| internal->workerBarrier.setThreads(threadCount); | |||||
| if (threadCount > 0) { | if (threadCount > 0) { | ||||
| // Create and start engine workers | // Create and start engine workers | ||||
| @@ -641,7 +675,7 @@ float Engine::getSampleTime() { | |||||
| void Engine::yieldWorkers() { | void Engine::yieldWorkers() { | ||||
| internal->workerBarrier.yield = true; | |||||
| internal->workerBarrier.yield(); | |||||
| } | } | ||||