diff --git a/adapters/standalone.cpp b/adapters/standalone.cpp index 7aa0ba95..6542cd85 100644 --- a/adapters/standalone.cpp +++ b/adapters/standalone.cpp @@ -129,7 +129,7 @@ int main(int argc, char* argv[]) { // Initialize environment system::init(); - system::initCpuFlags(); + system::resetFpuFlags(); asset::init(); if (!settings::devMode) { logger::logPath = asset::user("log.txt"); diff --git a/include/system.hpp b/include/system.hpp index 8afa5d2e..16172c91 100644 --- a/include/system.hpp +++ b/include/system.hpp @@ -195,10 +195,16 @@ The launched process will continue running if the current process is closed. */ void runProcessDetached(const std::string& path); -PRIVATE void init(); -/** Sets Rack-recommended CPU flags for the current thread. +/** Returns the CPU's floating point unit control flags. +MXCSR register on x64, and the FPCR register on ARM64. +*/ +uint32_t getFpuFlags(); +void setFpuFlags(uint32_t flags); +/** Sets Rack-recommended FPU flags for the current thread. */ -void initCpuFlags(); +void resetFpuFlags(); + +PRIVATE void init(); } // namespace system diff --git a/src/engine/Engine.cpp b/src/engine/Engine.cpp index 31e15092..a1455671 100644 --- a/src/engine/Engine.cpp +++ b/src/engine/Engine.cpp @@ -476,7 +476,7 @@ void Engine::stepBlock(int frames) { std::lock_guard stepLock(internal->blockMutex); SharedLock lock(internal->mutex); // Configure thread - system::initCpuFlags(); + system::resetFpuFlags(); random::init(); internal->blockFrame = internal->frame; @@ -1267,7 +1267,7 @@ void EngineWorker::run() { // Configure thread contextSet(engine->internal->context); system::setThreadName(string::f("Worker %d", id)); - system::initCpuFlags(); + system::resetFpuFlags(); random::init(); while (true) { diff --git a/src/system.cpp b/src/system.cpp index 0411580f..7c889c46 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -39,7 +39,6 @@ #include #include -#include /* @@ -952,23 +951,49 @@ void runProcessDetached(const std::string& path) { } -void init() { - initTime(); +uint32_t getFpuFlags() { +#if defined ARCH_X64 + return _mm_getcsr(); +#elif defined ARCH_ARM64 + uint64_t fpcr; + __asm__ volatile("mrs %0, fpcr" : "=r" (fpcr)); + return fpcr; +#endif +} + +void setFpuFlags(uint32_t flags) { +#if defined ARCH_X64 + _mm_setcsr(flags); +#elif defined ARCH_ARM64 + uint64_t fpcr = flags; + __asm__ volatile("msr fpcr, %0" :: "r" (fpcr)); +#endif } +void resetFpuFlags() { + uint32_t flags = 0; -void initCpuFlags() { +#if defined ARCH_X64 // Set CPU to flush-to-zero (FTZ) and denormals-are-zero (DAZ) mode // https://software.intel.com/en-us/node/682949 - // On ARM64, this is a SIMDe function. - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + // _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + flags |= 0x8000; + // _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + flags |= 0x0040; + // Round-to-nearest is default +#elif defined ARCH_ARM64 + // Set Flush-to-Zero + flags |= 1 << 24; // ARM64 always uses DAZ -#if defined ARCH_X64 - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + // Round-to-nearest is default #endif - // Reset rounding mode to default (nearest) - std::fesetround(FE_TONEAREST); + setFpuFlags(flags); +} + + +void init() { + initTime(); }