Browse Source

Allow building on ARM64. Only Mac tested, and deps not tested.

tags/v2.2.0
Andrew Belt 2 years ago
parent
commit
08cd572d51
10 changed files with 48 additions and 4 deletions
  1. +3
    -0
      .gitmodules
  2. +3
    -0
      arch.mk
  3. +6
    -1
      compile.mk
  4. +6
    -0
      dep/Makefile
  5. +1
    -0
      dep/sse2neon
  6. +1
    -1
      include/simd/Vector.hpp
  7. +9
    -0
      include/simd/common.hpp
  8. +1
    -1
      include/simd/sse_mathfun.h
  9. +3
    -0
      src/common.cpp
  10. +15
    -1
      src/engine/Engine.cpp

+ 3
- 0
.gitmodules View File

@@ -34,3 +34,6 @@
[submodule "dep/rtmidi"]
path = dep/rtmidi
url = https://github.com/VCVRack/rtmidi.git
[submodule "dep/sse2neon"]
path = dep/sse2neon
url = https://github.com/DLTcollab/sse2neon.git

+ 3
- 0
arch.mk View File

@@ -3,6 +3,9 @@ MACHINE = $(shell $(CC) -dumpmachine)
ifneq (,$(findstring x86_64-,$(MACHINE)))
ARCH_X64 := 1
ARCH_NAME := x64
else ifneq (,$(findstring arm64-,$(MACHINE)))
ARCH_ARM64 := 1
ARCH_NAME := arm64
else
$(error Could not determine CPU architecture of $(MACHINE). Try hacking around in arch.mk)
endif


+ 6
- 1
compile.mk View File

@@ -14,7 +14,7 @@ FLAGS += -MMD -MP
# Debugger symbols. These are removed with `strip`.
FLAGS += -g
# Optimization
FLAGS += -O3 -march=nehalem -funsafe-math-optimizations -fno-omit-frame-pointer
FLAGS += -O3 -funsafe-math-optimizations -fno-omit-frame-pointer
# Warnings
FLAGS += -Wall -Wextra -Wno-unused-parameter
# C++ standard
@@ -23,6 +23,11 @@ CXXFLAGS += -std=c++11
# Architecture-independent flags
ifdef ARCH_X64
FLAGS += -DARCH_X64
FLAGS += -march=nehalem
endif
ifdef ARCH_ARM64
FLAGS += -DARCH_ARM64
FLAGS += -march=armv8-a+fp+simd
endif

ifdef ARCH_LIN


+ 6
- 0
dep/Makefile View File

@@ -55,6 +55,7 @@ osdialog = include/osdialog.h
pffft = include/pffft.h
fuzzysearchdatabase = include/FuzzySearchDatabase.hpp
ghcfilesystem = include/ghc/filesystem.hpp
sse2neon = include/sse2neon/sse2neon.h

DEPS += $(glew)
DEPS += $(glfw)
@@ -72,6 +73,7 @@ DEPS += $(osdialog)
DEPS += $(pffft)
DEPS += $(fuzzysearchdatabase)
DEPS += $(ghcfilesystem)
DEPS += $(sse2neon)


DEP_LOCAL := .
@@ -255,6 +257,10 @@ $(ghcfilesystem): filesystem/include/ghc
mkdir -p include
cp -r $^ include/

$(sse2neon): sse2neon/sse2neon.h
mkdir -p include
cp $^ include/

# Helpers

src: glew-2.1.0 glfw jansson-2.12 speexdsp-SpeexDSP-1.2rc3 libsamplerate-0.1.9 openssl-1.1.1k curl-7.79.1 zstd-1.4.5 libarchive-3.4.3 rtaudio nanovg nanosvg oui-blendish osdialog


+ 1
- 0
dep/sse2neon

@@ -0,0 +1 @@
Subproject commit 988782cbadf95c2072b4b1b2b8fa0afa81b01c36

+ 1
- 1
include/simd/Vector.hpp View File

@@ -1,6 +1,6 @@
#pragma once
#include <cstring>
#include <pmmintrin.h>
#include "common.hpp"


namespace rack {


+ 9
- 0
include/simd/common.hpp View File

@@ -0,0 +1,9 @@
#pragma once

#if defined ARCH_X64
// Intel intrinsics header
#include <x86intrin.h>
#elif defined ARCH_ARM64
// Translation header for using SSE3 intrinsics on ARM64 NEON
#include <sse2neon.h>
#endif

+ 1
- 1
include/simd/sse_mathfun.h View File

@@ -43,7 +43,7 @@ This derived source file is released under the zlib license.
(this is the zlib license)
*/
#pragma once
#include <pmmintrin.h>
#include "common.hpp"


/** Generate 1.f without accessing memory */


+ 3
- 0
src/common.cpp View File

@@ -30,6 +30,9 @@ const std::string APP_VERSION = TOSTRING(_APP_VERSION);
#if defined ARCH_X64
const std::string APP_ARCH = "x64";
#endif
#if defined ARCH_ARM64
const std::string APP_ARCH = "arm64";
#endif
const std::string API_URL = "https://api.vcvrack.com";




+ 15
- 1
src/engine/Engine.cpp View File

@@ -5,7 +5,9 @@
#include <mutex>
#include <atomic>
#include <tuple>
#include <pmmintrin.h>
#if defined ARCH_X64
#include <pmmintrin.h>
#endif

#include <engine/Engine.hpp>
#include <settings.hpp>
@@ -21,6 +23,7 @@ namespace rack {
namespace engine {


#if defined ARCH_X64
static void initMXCSR() {
// Set CPU to flush-to-zero (FTZ) and denormals-are-zero (DAZ) mode
// https://software.intel.com/en-us/node/682949
@@ -29,6 +32,7 @@ static void initMXCSR() {
// Reset other flags
_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
}
#endif


/** Barrier based on mutexes.
@@ -92,7 +96,9 @@ struct SpinBarrier {
while (true) {
if (step.load(std::memory_order_relaxed) != s)
return;
#if defined ARCH_X64
__builtin_ia32_pause();
#endif
}
}
};
@@ -139,7 +145,9 @@ struct HybridBarrier {
while (!yielded.load(std::memory_order_relaxed)) {
if (step.load(std::memory_order_relaxed) != s)
return;
#if defined ARCH_X64
__builtin_ia32_pause();
#endif
}

// Wait on mutex CV
@@ -529,8 +537,10 @@ void Engine::stepBlock(int frames) {
std::lock_guard<std::mutex> stepLock(internal->blockMutex);
SharedLock<SharedMutex> lock(internal->mutex);
// Configure thread
#if defined ARCH_X64
uint32_t csr = _mm_getcsr();
initMXCSR();
#endif
random::init();

internal->blockFrame = internal->frame;
@@ -573,8 +583,10 @@ void Engine::stepBlock(int frames) {
internal->meterMax = 0.0;
}

#if defined ARCH_X64
// Reset MXCSR back to original value
_mm_setcsr(csr);
#endif
}


@@ -1299,7 +1311,9 @@ void EngineWorker::run() {
// Configure thread
contextSet(engine->internal->context);
system::setThreadName(string::f("Worker %d", id));
#if defined ARCH_X64
initMXCSR();
#endif
random::init();

while (true) {


Loading…
Cancel
Save