Browse Source

Allow building on ARM64. Only Mac tested, and deps not tested.

tags/v2.2.0
Andrew Belt 2 years ago
parent
commit
08cd572d51
10 changed files with 48 additions and 4 deletions
  1. +3
    -0
      .gitmodules
  2. +3
    -0
      arch.mk
  3. +6
    -1
      compile.mk
  4. +6
    -0
      dep/Makefile
  5. +1
    -0
      dep/sse2neon
  6. +1
    -1
      include/simd/Vector.hpp
  7. +9
    -0
      include/simd/common.hpp
  8. +1
    -1
      include/simd/sse_mathfun.h
  9. +3
    -0
      src/common.cpp
  10. +15
    -1
      src/engine/Engine.cpp

+ 3
- 0
.gitmodules View File

@@ -34,3 +34,6 @@
[submodule "dep/rtmidi"] [submodule "dep/rtmidi"]
path = dep/rtmidi path = dep/rtmidi
url = https://github.com/VCVRack/rtmidi.git url = https://github.com/VCVRack/rtmidi.git
[submodule "dep/sse2neon"]
path = dep/sse2neon
url = https://github.com/DLTcollab/sse2neon.git

+ 3
- 0
arch.mk View File

@@ -3,6 +3,9 @@ MACHINE = $(shell $(CC) -dumpmachine)
ifneq (,$(findstring x86_64-,$(MACHINE))) ifneq (,$(findstring x86_64-,$(MACHINE)))
ARCH_X64 := 1 ARCH_X64 := 1
ARCH_NAME := x64 ARCH_NAME := x64
else ifneq (,$(findstring arm64-,$(MACHINE)))
ARCH_ARM64 := 1
ARCH_NAME := arm64
else else
$(error Could not determine CPU architecture of $(MACHINE). Try hacking around in arch.mk) $(error Could not determine CPU architecture of $(MACHINE). Try hacking around in arch.mk)
endif endif


+ 6
- 1
compile.mk View File

@@ -14,7 +14,7 @@ FLAGS += -MMD -MP
# Debugger symbols. These are removed with `strip`. # Debugger symbols. These are removed with `strip`.
FLAGS += -g FLAGS += -g
# Optimization # Optimization
FLAGS += -O3 -march=nehalem -funsafe-math-optimizations -fno-omit-frame-pointer
FLAGS += -O3 -funsafe-math-optimizations -fno-omit-frame-pointer
# Warnings # Warnings
FLAGS += -Wall -Wextra -Wno-unused-parameter FLAGS += -Wall -Wextra -Wno-unused-parameter
# C++ standard # C++ standard
@@ -23,6 +23,11 @@ CXXFLAGS += -std=c++11
# Architecture-independent flags # Architecture-independent flags
ifdef ARCH_X64 ifdef ARCH_X64
FLAGS += -DARCH_X64 FLAGS += -DARCH_X64
FLAGS += -march=nehalem
endif
ifdef ARCH_ARM64
FLAGS += -DARCH_ARM64
FLAGS += -march=armv8-a+fp+simd
endif endif


ifdef ARCH_LIN ifdef ARCH_LIN


+ 6
- 0
dep/Makefile View File

@@ -55,6 +55,7 @@ osdialog = include/osdialog.h
pffft = include/pffft.h pffft = include/pffft.h
fuzzysearchdatabase = include/FuzzySearchDatabase.hpp fuzzysearchdatabase = include/FuzzySearchDatabase.hpp
ghcfilesystem = include/ghc/filesystem.hpp ghcfilesystem = include/ghc/filesystem.hpp
sse2neon = include/sse2neon/sse2neon.h


DEPS += $(glew) DEPS += $(glew)
DEPS += $(glfw) DEPS += $(glfw)
@@ -72,6 +73,7 @@ DEPS += $(osdialog)
DEPS += $(pffft) DEPS += $(pffft)
DEPS += $(fuzzysearchdatabase) DEPS += $(fuzzysearchdatabase)
DEPS += $(ghcfilesystem) DEPS += $(ghcfilesystem)
DEPS += $(sse2neon)




DEP_LOCAL := . DEP_LOCAL := .
@@ -255,6 +257,10 @@ $(ghcfilesystem): filesystem/include/ghc
mkdir -p include mkdir -p include
cp -r $^ include/ cp -r $^ include/


$(sse2neon): sse2neon/sse2neon.h
mkdir -p include
cp $^ include/

# Helpers # Helpers


src: glew-2.1.0 glfw jansson-2.12 speexdsp-SpeexDSP-1.2rc3 libsamplerate-0.1.9 openssl-1.1.1k curl-7.79.1 zstd-1.4.5 libarchive-3.4.3 rtaudio nanovg nanosvg oui-blendish osdialog src: glew-2.1.0 glfw jansson-2.12 speexdsp-SpeexDSP-1.2rc3 libsamplerate-0.1.9 openssl-1.1.1k curl-7.79.1 zstd-1.4.5 libarchive-3.4.3 rtaudio nanovg nanosvg oui-blendish osdialog


+ 1
- 0
dep/sse2neon

@@ -0,0 +1 @@
Subproject commit 988782cbadf95c2072b4b1b2b8fa0afa81b01c36

+ 1
- 1
include/simd/Vector.hpp View File

@@ -1,6 +1,6 @@
#pragma once #pragma once
#include <cstring> #include <cstring>
#include <pmmintrin.h>
#include "common.hpp"




namespace rack { namespace rack {


+ 9
- 0
include/simd/common.hpp View File

@@ -0,0 +1,9 @@
#pragma once

#if defined ARCH_X64
// Intel intrinsics header
#include <x86intrin.h>
#elif defined ARCH_ARM64
// Translation header for using SSE3 intrinsics on ARM64 NEON
#include <sse2neon.h>
#endif

+ 1
- 1
include/simd/sse_mathfun.h View File

@@ -43,7 +43,7 @@ This derived source file is released under the zlib license.
(this is the zlib license) (this is the zlib license)
*/ */
#pragma once #pragma once
#include <pmmintrin.h>
#include "common.hpp"




/** Generate 1.f without accessing memory */ /** Generate 1.f without accessing memory */


+ 3
- 0
src/common.cpp View File

@@ -30,6 +30,9 @@ const std::string APP_VERSION = TOSTRING(_APP_VERSION);
#if defined ARCH_X64 #if defined ARCH_X64
const std::string APP_ARCH = "x64"; const std::string APP_ARCH = "x64";
#endif #endif
#if defined ARCH_ARM64
const std::string APP_ARCH = "arm64";
#endif
const std::string API_URL = "https://api.vcvrack.com"; const std::string API_URL = "https://api.vcvrack.com";






+ 15
- 1
src/engine/Engine.cpp View File

@@ -5,7 +5,9 @@
#include <mutex> #include <mutex>
#include <atomic> #include <atomic>
#include <tuple> #include <tuple>
#include <pmmintrin.h>
#if defined ARCH_X64
#include <pmmintrin.h>
#endif


#include <engine/Engine.hpp> #include <engine/Engine.hpp>
#include <settings.hpp> #include <settings.hpp>
@@ -21,6 +23,7 @@ namespace rack {
namespace engine { namespace engine {




#if defined ARCH_X64
static void initMXCSR() { static void initMXCSR() {
// Set CPU to flush-to-zero (FTZ) and denormals-are-zero (DAZ) mode // Set CPU to flush-to-zero (FTZ) and denormals-are-zero (DAZ) mode
// https://software.intel.com/en-us/node/682949 // https://software.intel.com/en-us/node/682949
@@ -29,6 +32,7 @@ static void initMXCSR() {
// Reset other flags // Reset other flags
_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
} }
#endif




/** Barrier based on mutexes. /** Barrier based on mutexes.
@@ -92,7 +96,9 @@ struct SpinBarrier {
while (true) { while (true) {
if (step.load(std::memory_order_relaxed) != s) if (step.load(std::memory_order_relaxed) != s)
return; return;
#if defined ARCH_X64
__builtin_ia32_pause(); __builtin_ia32_pause();
#endif
} }
} }
}; };
@@ -139,7 +145,9 @@ struct HybridBarrier {
while (!yielded.load(std::memory_order_relaxed)) { while (!yielded.load(std::memory_order_relaxed)) {
if (step.load(std::memory_order_relaxed) != s) if (step.load(std::memory_order_relaxed) != s)
return; return;
#if defined ARCH_X64
__builtin_ia32_pause(); __builtin_ia32_pause();
#endif
} }


// Wait on mutex CV // Wait on mutex CV
@@ -529,8 +537,10 @@ void Engine::stepBlock(int frames) {
std::lock_guard<std::mutex> stepLock(internal->blockMutex); std::lock_guard<std::mutex> stepLock(internal->blockMutex);
SharedLock<SharedMutex> lock(internal->mutex); SharedLock<SharedMutex> lock(internal->mutex);
// Configure thread // Configure thread
#if defined ARCH_X64
uint32_t csr = _mm_getcsr(); uint32_t csr = _mm_getcsr();
initMXCSR(); initMXCSR();
#endif
random::init(); random::init();


internal->blockFrame = internal->frame; internal->blockFrame = internal->frame;
@@ -573,8 +583,10 @@ void Engine::stepBlock(int frames) {
internal->meterMax = 0.0; internal->meterMax = 0.0;
} }


#if defined ARCH_X64
// Reset MXCSR back to original value // Reset MXCSR back to original value
_mm_setcsr(csr); _mm_setcsr(csr);
#endif
} }




@@ -1299,7 +1311,9 @@ void EngineWorker::run() {
// Configure thread // Configure thread
contextSet(engine->internal->context); contextSet(engine->internal->context);
system::setThreadName(string::f("Worker %d", id)); system::setThreadName(string::f("Worker %d", id));
#if defined ARCH_X64
initMXCSR(); initMXCSR();
#endif
random::init(); random::init();


while (true) { while (true) {


Loading…
Cancel
Save