From f59a5dfc7fe27a94f54809be1b1c9b6561f34fc8 Mon Sep 17 00:00:00 2001 From: hogliux Date: Mon, 2 Oct 2017 18:23:51 +0100 Subject: [PATCH] Fixed an issue where ScopedNoDenormals would do nothing on all platforms and added arm implementation --- .../buffers/juce_FloatVectorOperations.cpp | 84 ++++++++++++++++++- .../buffers/juce_FloatVectorOperations.h | 30 +++---- .../juce_audio_basics/juce_audio_basics.cpp | 23 ----- modules/juce_audio_basics/juce_audio_basics.h | 25 ++++++ .../juce_core/system/juce_TargetPlatform.h | 2 +- 5 files changed, 119 insertions(+), 45 deletions(-) diff --git a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp index 5523a4a2f7..9c3316c3d5 100644 --- a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp +++ b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp @@ -1028,20 +1028,98 @@ double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int #endif } +intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept +{ + intptr_t fpsr = 0; + #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS + fpsr = static_cast (_mm_getcsr()); + #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON + #if defined (__arm64__) || defined (__aarch64__) + asm volatile("mrs %0, fpcr" : "=r" (fpsr)); + #elif JUCE_USE_ARM_NEON + asm volatile("vmrs %0, fpscr" : "=r" (fpsr)); + #endif + #else + #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) + jassertfalse; // No support for getting the floating point status register for your platform + #endif + #endif + + return fpsr; +} + +void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister (intptr_t fpsr) noexcept +{ + #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS + auto fpsr_w = static_cast (fpsr); + _mm_setcsr (fpsr_w); + #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON + #if defined (__arm64__) || defined (__aarch64__) + asm volatile("msr fpcr, %0" : : "ri" (fpsr)); + #elif JUCE_USE_ARM_NEON + asm volatile("vmsr fpscr, %0" : : "ri" (fpsr)); + #endif + #else + #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) + jassertfalse; // No support for getting the floating point status register for your platform + #endif + ignoreUnused (fpsr); + #endif +} + void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept { + #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) #if JUCE_USE_SSE_INTRINSICS - _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF); + intptr_t mask = _MM_FLUSH_ZERO_MASK; + #else /*JUCE_USE_ARM_NEON*/ + intptr_t mask = (1 << 24 /* FZ */); + #endif + setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0)); + #else + #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) + jassertfalse; // No support for flush to zero mode on your platform #endif ignoreUnused (shouldEnable); + #endif } void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept { + #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) + #if JUCE_USE_SSE_INTRINSICS + intptr_t mask = 0x8040; + #else /*JUCE_USE_ARM_NEON*/ + intptr_t mask = (1 << 24 /* FZ */) | (1 << 23 /* RMODE_1 */) | (1 << 22 /* RMODE_0 */); + #endif + + setFpStatusRegister (getFpStatusRegister() | mask); + #else + #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) + jassertfalse; // No support for disable denormals mode on your platform + #endif + #endif +} + +ScopedNoDenormals::ScopedNoDenormals() noexcept +{ + #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) #if JUCE_USE_SSE_INTRINSICS - const unsigned int mxcsr = _mm_getcsr(); - _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits + intptr_t mask = 0x8040; + #else /*JUCE_USE_ARM_NEON*/ + intptr_t mask = (1 << 24 /* FZ */) | (1 << 23 /* RMODE_1 */) | (1 << 22 /* RMODE_0 */); #endif + + fpsr = FloatVectorOperations::getFpStatusRegister(); + FloatVectorOperations::setFpStatusRegister (fpsr | mask); + #endif +} + +ScopedNoDenormals::~ScopedNoDenormals() noexcept +{ + #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) + FloatVectorOperations::setFpStatusRegister (fpsr); + #endif } //============================================================================== diff --git a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h index c44bbd800f..ca5d734691 100644 --- a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h +++ b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h @@ -28,6 +28,7 @@ namespace juce #else #define JUCE_SNAP_TO_ZERO(n) ignoreUnused (n) #endif +class ScopedNoDenormals; //============================================================================== /** @@ -219,6 +220,12 @@ public: call before audio processing code where you really want to avoid denormalisation performance hits. */ static void JUCE_CALLTYPE disableDenormalisedNumberSupport() noexcept; + +private: + friend ScopedNoDenormals; + + static intptr_t JUCE_CALLTYPE getFpStatusRegister() noexcept; + static void JUCE_CALLTYPE setFpStatusRegister (intptr_t) noexcept; }; //============================================================================== @@ -229,26 +236,13 @@ public: class ScopedNoDenormals { public: - inline ScopedNoDenormals() noexcept - { - #if JUCE_USE_SSE_INTRINSICS - mxcsr = _mm_getcsr(); - _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits - #endif - } - - - inline ~ScopedNoDenormals() noexcept - { - #if JUCE_USE_SSE_INTRINSICS - _mm_setcsr (mxcsr); - #endif - } + ScopedNoDenormals() noexcept; + ~ScopedNoDenormals() noexcept; private: - #if JUCE_USE_SSE_INTRINSICS - unsigned int mxcsr; - #endif + #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) + intptr_t fpsr; + #endif }; } // namespace juce diff --git a/modules/juce_audio_basics/juce_audio_basics.cpp b/modules/juce_audio_basics/juce_audio_basics.cpp index 09e13e49c8..60b83c502a 100644 --- a/modules/juce_audio_basics/juce_audio_basics.cpp +++ b/modules/juce_audio_basics/juce_audio_basics.cpp @@ -31,22 +31,10 @@ #include "juce_audio_basics.h" -#if JUCE_MINGW && ! defined (__SSE2__) - #define JUCE_USE_SSE_INTRINSICS 0 -#endif - #if JUCE_MINGW && ! defined (alloca) #define alloca __builtin_alloca #endif -#ifndef JUCE_USE_SSE_INTRINSICS - #define JUCE_USE_SSE_INTRINSICS 1 -#endif - -#if ! JUCE_INTEL - #undef JUCE_USE_SSE_INTRINSICS -#endif - #if JUCE_USE_SSE_INTRINSICS #include #endif @@ -61,17 +49,6 @@ #undef JUCE_USE_VDSP_FRAMEWORK #endif -#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON)) - #define JUCE_USE_ARM_NEON 1 -#endif - -#if TARGET_IPHONE_SIMULATOR - #ifdef JUCE_USE_ARM_NEON - #undef JUCE_USE_ARM_NEON - #endif - #define JUCE_USE_ARM_NEON 0 -#endif - #if JUCE_USE_ARM_NEON #include #endif diff --git a/modules/juce_audio_basics/juce_audio_basics.h b/modules/juce_audio_basics/juce_audio_basics.h index 03c99363f6..8c71dc07ea 100644 --- a/modules/juce_audio_basics/juce_audio_basics.h +++ b/modules/juce_audio_basics/juce_audio_basics.h @@ -55,6 +55,31 @@ #undef Complex // apparently some C libraries actually define these symbols (!) #undef Factor +//============================================================================== +#if JUCE_MINGW && ! defined (__SSE2__) + #define JUCE_USE_SSE_INTRINSICS 0 +#endif + +#ifndef JUCE_USE_SSE_INTRINSICS + #define JUCE_USE_SSE_INTRINSICS 1 +#endif + +#if ! JUCE_INTEL + #undef JUCE_USE_SSE_INTRINSICS +#endif + +#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON)) + #define JUCE_USE_ARM_NEON 1 +#endif + +#if TARGET_IPHONE_SIMULATOR + #ifdef JUCE_USE_ARM_NEON + #undef JUCE_USE_ARM_NEON + #endif + #define JUCE_USE_ARM_NEON 0 +#endif + +//============================================================================== #include "buffers/juce_AudioDataConverters.h" #include "buffers/juce_FloatVectorOperations.h" #include "buffers/juce_AudioSampleBuffer.h" diff --git a/modules/juce_core/system/juce_TargetPlatform.h b/modules/juce_core/system/juce_TargetPlatform.h index ae9d7e1947..5eb1de8102 100644 --- a/modules/juce_core/system/juce_TargetPlatform.h +++ b/modules/juce_core/system/juce_TargetPlatform.h @@ -172,7 +172,7 @@ #define JUCE_32BIT 1 #endif - #if defined (__arm__) || defined (__arm64__) + #if defined (__arm__) || defined (__arm64__) || defined (__aarch64__) #define JUCE_ARM 1 #elif __MMX__ || __SSE__ || __amd64__ #define JUCE_INTEL 1