@@ -34,3 +34,6 @@ | |||||
[submodule "dep/rtmidi"] | [submodule "dep/rtmidi"] | ||||
path = dep/rtmidi | path = dep/rtmidi | ||||
url = https://github.com/VCVRack/rtmidi.git | url = https://github.com/VCVRack/rtmidi.git | ||||
[submodule "dep/sse2neon"] | |||||
path = dep/sse2neon | |||||
url = https://github.com/DLTcollab/sse2neon.git |
@@ -3,6 +3,9 @@ MACHINE = $(shell $(CC) -dumpmachine) | |||||
ifneq (,$(findstring x86_64-,$(MACHINE))) | ifneq (,$(findstring x86_64-,$(MACHINE))) | ||||
ARCH_X64 := 1 | ARCH_X64 := 1 | ||||
ARCH_NAME := x64 | ARCH_NAME := x64 | ||||
else ifneq (,$(findstring arm64-,$(MACHINE))) | |||||
ARCH_ARM64 := 1 | |||||
ARCH_NAME := arm64 | |||||
else | else | ||||
$(error Could not determine CPU architecture of $(MACHINE). Try hacking around in arch.mk) | $(error Could not determine CPU architecture of $(MACHINE). Try hacking around in arch.mk) | ||||
endif | endif | ||||
@@ -14,7 +14,7 @@ FLAGS += -MMD -MP | |||||
# Debugger symbols. These are removed with `strip`. | # Debugger symbols. These are removed with `strip`. | ||||
FLAGS += -g | FLAGS += -g | ||||
# Optimization | # Optimization | ||||
FLAGS += -O3 -march=nehalem -funsafe-math-optimizations -fno-omit-frame-pointer | |||||
FLAGS += -O3 -funsafe-math-optimizations -fno-omit-frame-pointer | |||||
# Warnings | # Warnings | ||||
FLAGS += -Wall -Wextra -Wno-unused-parameter | FLAGS += -Wall -Wextra -Wno-unused-parameter | ||||
# C++ standard | # C++ standard | ||||
@@ -23,6 +23,11 @@ CXXFLAGS += -std=c++11 | |||||
# Architecture-independent flags | # Architecture-independent flags | ||||
ifdef ARCH_X64 | ifdef ARCH_X64 | ||||
FLAGS += -DARCH_X64 | FLAGS += -DARCH_X64 | ||||
FLAGS += -march=nehalem | |||||
endif | |||||
ifdef ARCH_ARM64 | |||||
FLAGS += -DARCH_ARM64 | |||||
FLAGS += -march=armv8-a+fp+simd | |||||
endif | endif | ||||
ifdef ARCH_LIN | ifdef ARCH_LIN | ||||
@@ -55,6 +55,7 @@ osdialog = include/osdialog.h | |||||
pffft = include/pffft.h | pffft = include/pffft.h | ||||
fuzzysearchdatabase = include/FuzzySearchDatabase.hpp | fuzzysearchdatabase = include/FuzzySearchDatabase.hpp | ||||
ghcfilesystem = include/ghc/filesystem.hpp | ghcfilesystem = include/ghc/filesystem.hpp | ||||
sse2neon = include/sse2neon/sse2neon.h | |||||
DEPS += $(glew) | DEPS += $(glew) | ||||
DEPS += $(glfw) | DEPS += $(glfw) | ||||
@@ -72,6 +73,7 @@ DEPS += $(osdialog) | |||||
DEPS += $(pffft) | DEPS += $(pffft) | ||||
DEPS += $(fuzzysearchdatabase) | DEPS += $(fuzzysearchdatabase) | ||||
DEPS += $(ghcfilesystem) | DEPS += $(ghcfilesystem) | ||||
DEPS += $(sse2neon) | |||||
DEP_LOCAL := . | DEP_LOCAL := . | ||||
@@ -255,6 +257,10 @@ $(ghcfilesystem): filesystem/include/ghc | |||||
mkdir -p include | mkdir -p include | ||||
cp -r $^ include/ | cp -r $^ include/ | ||||
$(sse2neon): sse2neon/sse2neon.h | |||||
mkdir -p include | |||||
cp $^ include/ | |||||
# Helpers | # Helpers | ||||
src: glew-2.1.0 glfw jansson-2.12 speexdsp-SpeexDSP-1.2rc3 libsamplerate-0.1.9 openssl-1.1.1k curl-7.79.1 zstd-1.4.5 libarchive-3.4.3 rtaudio nanovg nanosvg oui-blendish osdialog | src: glew-2.1.0 glfw jansson-2.12 speexdsp-SpeexDSP-1.2rc3 libsamplerate-0.1.9 openssl-1.1.1k curl-7.79.1 zstd-1.4.5 libarchive-3.4.3 rtaudio nanovg nanosvg oui-blendish osdialog | ||||
@@ -0,0 +1 @@ | |||||
Subproject commit 988782cbadf95c2072b4b1b2b8fa0afa81b01c36 |
@@ -1,6 +1,6 @@ | |||||
#pragma once | #pragma once | ||||
#include <cstring> | #include <cstring> | ||||
#include <pmmintrin.h> | |||||
#include "common.hpp" | |||||
namespace rack { | namespace rack { | ||||
@@ -0,0 +1,9 @@ | |||||
#pragma once | |||||
#if defined ARCH_X64 | |||||
// Intel intrinsics header | |||||
#include <x86intrin.h> | |||||
#elif defined ARCH_ARM64 | |||||
// Translation header for using SSE3 intrinsics on ARM64 NEON | |||||
#include <sse2neon.h> | |||||
#endif |
@@ -43,7 +43,7 @@ This derived source file is released under the zlib license. | |||||
(this is the zlib license) | (this is the zlib license) | ||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include <pmmintrin.h> | |||||
#include "common.hpp" | |||||
/** Generate 1.f without accessing memory */ | /** Generate 1.f without accessing memory */ | ||||
@@ -30,6 +30,9 @@ const std::string APP_VERSION = TOSTRING(_APP_VERSION); | |||||
#if defined ARCH_X64 | #if defined ARCH_X64 | ||||
const std::string APP_ARCH = "x64"; | const std::string APP_ARCH = "x64"; | ||||
#endif | #endif | ||||
#if defined ARCH_ARM64 | |||||
const std::string APP_ARCH = "arm64"; | |||||
#endif | |||||
const std::string API_URL = "https://api.vcvrack.com"; | const std::string API_URL = "https://api.vcvrack.com"; | ||||
@@ -5,7 +5,9 @@ | |||||
#include <mutex> | #include <mutex> | ||||
#include <atomic> | #include <atomic> | ||||
#include <tuple> | #include <tuple> | ||||
#include <pmmintrin.h> | |||||
#if defined ARCH_X64 | |||||
#include <pmmintrin.h> | |||||
#endif | |||||
#include <engine/Engine.hpp> | #include <engine/Engine.hpp> | ||||
#include <settings.hpp> | #include <settings.hpp> | ||||
@@ -21,6 +23,7 @@ namespace rack { | |||||
namespace engine { | namespace engine { | ||||
#if defined ARCH_X64 | |||||
static void initMXCSR() { | static void initMXCSR() { | ||||
// Set CPU to flush-to-zero (FTZ) and denormals-are-zero (DAZ) mode | // Set CPU to flush-to-zero (FTZ) and denormals-are-zero (DAZ) mode | ||||
// https://software.intel.com/en-us/node/682949 | // https://software.intel.com/en-us/node/682949 | ||||
@@ -29,6 +32,7 @@ static void initMXCSR() { | |||||
// Reset other flags | // Reset other flags | ||||
_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); | _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); | ||||
} | } | ||||
#endif | |||||
/** Barrier based on mutexes. | /** Barrier based on mutexes. | ||||
@@ -92,7 +96,9 @@ struct SpinBarrier { | |||||
while (true) { | while (true) { | ||||
if (step.load(std::memory_order_relaxed) != s) | if (step.load(std::memory_order_relaxed) != s) | ||||
return; | return; | ||||
#if defined ARCH_X64 | |||||
__builtin_ia32_pause(); | __builtin_ia32_pause(); | ||||
#endif | |||||
} | } | ||||
} | } | ||||
}; | }; | ||||
@@ -139,7 +145,9 @@ struct HybridBarrier { | |||||
while (!yielded.load(std::memory_order_relaxed)) { | while (!yielded.load(std::memory_order_relaxed)) { | ||||
if (step.load(std::memory_order_relaxed) != s) | if (step.load(std::memory_order_relaxed) != s) | ||||
return; | return; | ||||
#if defined ARCH_X64 | |||||
__builtin_ia32_pause(); | __builtin_ia32_pause(); | ||||
#endif | |||||
} | } | ||||
// Wait on mutex CV | // Wait on mutex CV | ||||
@@ -529,8 +537,10 @@ void Engine::stepBlock(int frames) { | |||||
std::lock_guard<std::mutex> stepLock(internal->blockMutex); | std::lock_guard<std::mutex> stepLock(internal->blockMutex); | ||||
SharedLock<SharedMutex> lock(internal->mutex); | SharedLock<SharedMutex> lock(internal->mutex); | ||||
// Configure thread | // Configure thread | ||||
#if defined ARCH_X64 | |||||
uint32_t csr = _mm_getcsr(); | uint32_t csr = _mm_getcsr(); | ||||
initMXCSR(); | initMXCSR(); | ||||
#endif | |||||
random::init(); | random::init(); | ||||
internal->blockFrame = internal->frame; | internal->blockFrame = internal->frame; | ||||
@@ -573,8 +583,10 @@ void Engine::stepBlock(int frames) { | |||||
internal->meterMax = 0.0; | internal->meterMax = 0.0; | ||||
} | } | ||||
#if defined ARCH_X64 | |||||
// Reset MXCSR back to original value | // Reset MXCSR back to original value | ||||
_mm_setcsr(csr); | _mm_setcsr(csr); | ||||
#endif | |||||
} | } | ||||
@@ -1299,7 +1311,9 @@ void EngineWorker::run() { | |||||
// Configure thread | // Configure thread | ||||
contextSet(engine->internal->context); | contextSet(engine->internal->context); | ||||
system::setThreadName(string::f("Worker %d", id)); | system::setThreadName(string::f("Worker %d", id)); | ||||
#if defined ARCH_X64 | |||||
initMXCSR(); | initMXCSR(); | ||||
#endif | |||||
random::init(); | random::init(); | ||||
while (true) { | while (true) { | ||||