From e155450ccb58df16557193ef30191cff996e92d9 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Wed, 13 Feb 2019 09:23:32 -0500
Subject: [PATCH] Use on-demand module assignment for engine worker threads
 instead of fixed strides.

---
 src/app/ModuleBrowser.cpp |  1 -
 src/app/Toolbar.cpp       |  4 ++--
 src/engine/Engine.cpp     | 38 +++++++++++++++++++-------------------
 3 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/src/app/ModuleBrowser.cpp b/src/app/ModuleBrowser.cpp
index d0a515c6..7612a55b 100644
--- a/src/app/ModuleBrowser.cpp
+++ b/src/app/ModuleBrowser.cpp
@@ -26,7 +26,6 @@ namespace app {
 static std::set<plugin::Model*> sFavoriteModels;
 
 
-
 bool isMatch(const std::string &s, const std::string &search) {
 	std::string s2 = string::lowercase(s);
 	std::string search2 = string::lowercase(search);
diff --git a/src/app/Toolbar.cpp b/src/app/Toolbar.cpp
index ae5e38a0..bf0db5a5 100644
--- a/src/app/Toolbar.cpp
+++ b/src/app/Toolbar.cpp
@@ -317,9 +317,9 @@ struct ThreadCountValueItem : ui::MenuItem {
 		this->threadCount = threadCount;
 		text = string::f("%d", threadCount);
 		if (threadCount == system::getLogicalCoreCount() / 2)
-			text += " (best performance)";
+			text += " (most modules)";
 		else if (threadCount == 1)
-			text += " (best efficiency)";
+			text += " (lowest CPU usage)";
 		rightText = CHECKMARK(APP->engine->getThreadCount() == threadCount);
 	}
 	void onAction(const event::Action &e) override {
diff --git a/src/engine/Engine.cpp b/src/engine/Engine.cpp
index 74e7dc1c..85947385 100644
--- a/src/engine/Engine.cpp
+++ b/src/engine/Engine.cpp
@@ -60,33 +60,29 @@ struct Barrier {
 		if (total <= 1)
 			return;
 		std::unique_lock<std::mutex> lock(mutex);
-		count++;
-		if (count < total) {
-			cv.wait(lock);
-		}
-		else {
+		int id = ++count;
+		if (id == total) {
 			count = 0;
 			cv.notify_all();
 		}
+		else {
+			cv.wait(lock);
+		}
 	}
 };
 
 
 struct SpinBarrier {
-	std::atomic<int> count;
+	std::atomic<int> count{0};
 	int total = 0;
 
-	SpinBarrier() {
-		count = 0;
-	}
-
 	void wait() {
-		count++;
-		if (count < total) {
-			while (count > 0) {}
+		int id = ++count;
+		if (id == total) {
+			count = 0;
 		}
 		else {
-			count = 0;
+			while (count != 0);
 		}
 	}
 };
@@ -150,6 +146,7 @@ struct Engine::Internal {
 	std::vector<EngineWorker> workers;
 	SpinBarrier engineBarrier;
 	SpinBarrier workerBarrier;
+	std::atomic<int> workerModuleIndex;
 };
 
 
@@ -185,12 +182,14 @@ static void Engine_stepModules(Engine *engine, int threadId) {
 	int threadCount = internal->threadCount;
 	int modulesLen = internal->modules.size();
 
-	// TODO
-	// There's room for optimization here by choosing modules intelligently rather than fixed strides.
-	// See OpenMP's `guided` scheduling algorithm.
-
 	// Step each module
-	for (int i = threadId; i < modulesLen; i += threadCount) {
+	// for (int i = threadId; i < modulesLen; i += threadCount) {
+	while (true) {
+		// Chose module
+		int i = internal->workerModuleIndex++;
+		if (i >= modulesLen)
+			break;
+
 		Module *module = internal->modules[i];
 		if (!module->bypass) {
 			// Step module
@@ -245,6 +244,7 @@ static void Engine_step(Engine *engine) {
 	}
 
 	// Step modules along with workers
+	internal->workerModuleIndex = 0;
 	internal->engineBarrier.wait();
 	Engine_stepModules(engine, 0);
 	internal->workerBarrier.wait();