@@ -1028,20 +1028,98 @@ double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int | |||||
#endif | #endif | ||||
} | } | ||||
intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept | |||||
{ | |||||
intptr_t fpsr = 0; | |||||
#if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS | |||||
fpsr = static_cast<intptr_t> (_mm_getcsr()); | |||||
#elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON | |||||
#if defined (__arm64__) || defined (__aarch64__) | |||||
asm volatile("mrs %0, fpcr" : "=r" (fpsr)); | |||||
#elif JUCE_USE_ARM_NEON | |||||
asm volatile("vmrs %0, fpscr" : "=r" (fpsr)); | |||||
#endif | |||||
#else | |||||
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) | |||||
jassertfalse; // No support for getting the floating point status register for your platform | |||||
#endif | |||||
#endif | |||||
return fpsr; | |||||
} | |||||
void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister (intptr_t fpsr) noexcept | |||||
{ | |||||
#if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS | |||||
auto fpsr_w = static_cast<uint32_t> (fpsr); | |||||
_mm_setcsr (fpsr_w); | |||||
#elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON | |||||
#if defined (__arm64__) || defined (__aarch64__) | |||||
asm volatile("msr fpcr, %0" : : "ri" (fpsr)); | |||||
#elif JUCE_USE_ARM_NEON | |||||
asm volatile("vmsr fpscr, %0" : : "ri" (fpsr)); | |||||
#endif | |||||
#else | |||||
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) | |||||
jassertfalse; // No support for getting the floating point status register for your platform | |||||
#endif | |||||
ignoreUnused (fpsr); | |||||
#endif | |||||
} | |||||
void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept | void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept | ||||
{ | { | ||||
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) | |||||
#if JUCE_USE_SSE_INTRINSICS | #if JUCE_USE_SSE_INTRINSICS | ||||
_MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF); | |||||
intptr_t mask = _MM_FLUSH_ZERO_MASK; | |||||
#else /*JUCE_USE_ARM_NEON*/ | |||||
intptr_t mask = (1 << 24 /* FZ */); | |||||
#endif | |||||
setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0)); | |||||
#else | |||||
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) | |||||
jassertfalse; // No support for flush to zero mode on your platform | |||||
#endif | #endif | ||||
ignoreUnused (shouldEnable); | ignoreUnused (shouldEnable); | ||||
#endif | |||||
} | } | ||||
void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept | void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept | ||||
{ | { | ||||
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) | |||||
#if JUCE_USE_SSE_INTRINSICS | |||||
intptr_t mask = 0x8040; | |||||
#else /*JUCE_USE_ARM_NEON*/ | |||||
intptr_t mask = (1 << 24 /* FZ */) | (1 << 23 /* RMODE_1 */) | (1 << 22 /* RMODE_0 */); | |||||
#endif | |||||
setFpStatusRegister (getFpStatusRegister() | mask); | |||||
#else | |||||
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM)) | |||||
jassertfalse; // No support for disable denormals mode on your platform | |||||
#endif | |||||
#endif | |||||
} | |||||
ScopedNoDenormals::ScopedNoDenormals() noexcept | |||||
{ | |||||
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) | |||||
#if JUCE_USE_SSE_INTRINSICS | #if JUCE_USE_SSE_INTRINSICS | ||||
const unsigned int mxcsr = _mm_getcsr(); | |||||
_mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits | |||||
intptr_t mask = 0x8040; | |||||
#else /*JUCE_USE_ARM_NEON*/ | |||||
intptr_t mask = (1 << 24 /* FZ */) | (1 << 23 /* RMODE_1 */) | (1 << 22 /* RMODE_0 */); | |||||
#endif | #endif | ||||
fpsr = FloatVectorOperations::getFpStatusRegister(); | |||||
FloatVectorOperations::setFpStatusRegister (fpsr | mask); | |||||
#endif | |||||
} | |||||
ScopedNoDenormals::~ScopedNoDenormals() noexcept | |||||
{ | |||||
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) | |||||
FloatVectorOperations::setFpStatusRegister (fpsr); | |||||
#endif | |||||
} | } | ||||
//============================================================================== | //============================================================================== | ||||
@@ -28,6 +28,7 @@ namespace juce | |||||
#else | #else | ||||
#define JUCE_SNAP_TO_ZERO(n) ignoreUnused (n) | #define JUCE_SNAP_TO_ZERO(n) ignoreUnused (n) | ||||
#endif | #endif | ||||
class ScopedNoDenormals; | |||||
//============================================================================== | //============================================================================== | ||||
/** | /** | ||||
@@ -219,6 +220,12 @@ public: | |||||
call before audio processing code where you really want to avoid denormalisation performance hits. | call before audio processing code where you really want to avoid denormalisation performance hits. | ||||
*/ | */ | ||||
static void JUCE_CALLTYPE disableDenormalisedNumberSupport() noexcept; | static void JUCE_CALLTYPE disableDenormalisedNumberSupport() noexcept; | ||||
private: | |||||
friend ScopedNoDenormals; | |||||
static intptr_t JUCE_CALLTYPE getFpStatusRegister() noexcept; | |||||
static void JUCE_CALLTYPE setFpStatusRegister (intptr_t) noexcept; | |||||
}; | }; | ||||
//============================================================================== | //============================================================================== | ||||
@@ -229,26 +236,13 @@ public: | |||||
class ScopedNoDenormals | class ScopedNoDenormals | ||||
{ | { | ||||
public: | public: | ||||
inline ScopedNoDenormals() noexcept | |||||
{ | |||||
#if JUCE_USE_SSE_INTRINSICS | |||||
mxcsr = _mm_getcsr(); | |||||
_mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits | |||||
#endif | |||||
} | |||||
inline ~ScopedNoDenormals() noexcept | |||||
{ | |||||
#if JUCE_USE_SSE_INTRINSICS | |||||
_mm_setcsr (mxcsr); | |||||
#endif | |||||
} | |||||
ScopedNoDenormals() noexcept; | |||||
~ScopedNoDenormals() noexcept; | |||||
private: | private: | ||||
#if JUCE_USE_SSE_INTRINSICS | |||||
unsigned int mxcsr; | |||||
#endif | |||||
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__)) | |||||
intptr_t fpsr; | |||||
#endif | |||||
}; | }; | ||||
} // namespace juce | } // namespace juce |
@@ -31,22 +31,10 @@ | |||||
#include "juce_audio_basics.h" | #include "juce_audio_basics.h" | ||||
#if JUCE_MINGW && ! defined (__SSE2__) | |||||
#define JUCE_USE_SSE_INTRINSICS 0 | |||||
#endif | |||||
#if JUCE_MINGW && ! defined (alloca) | #if JUCE_MINGW && ! defined (alloca) | ||||
#define alloca __builtin_alloca | #define alloca __builtin_alloca | ||||
#endif | #endif | ||||
#ifndef JUCE_USE_SSE_INTRINSICS | |||||
#define JUCE_USE_SSE_INTRINSICS 1 | |||||
#endif | |||||
#if ! JUCE_INTEL | |||||
#undef JUCE_USE_SSE_INTRINSICS | |||||
#endif | |||||
#if JUCE_USE_SSE_INTRINSICS | #if JUCE_USE_SSE_INTRINSICS | ||||
#include <emmintrin.h> | #include <emmintrin.h> | ||||
#endif | #endif | ||||
@@ -61,17 +49,6 @@ | |||||
#undef JUCE_USE_VDSP_FRAMEWORK | #undef JUCE_USE_VDSP_FRAMEWORK | ||||
#endif | #endif | ||||
#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON)) | |||||
#define JUCE_USE_ARM_NEON 1 | |||||
#endif | |||||
#if TARGET_IPHONE_SIMULATOR | |||||
#ifdef JUCE_USE_ARM_NEON | |||||
#undef JUCE_USE_ARM_NEON | |||||
#endif | |||||
#define JUCE_USE_ARM_NEON 0 | |||||
#endif | |||||
#if JUCE_USE_ARM_NEON | #if JUCE_USE_ARM_NEON | ||||
#include <arm_neon.h> | #include <arm_neon.h> | ||||
#endif | #endif | ||||
@@ -55,6 +55,31 @@ | |||||
#undef Complex // apparently some C libraries actually define these symbols (!) | #undef Complex // apparently some C libraries actually define these symbols (!) | ||||
#undef Factor | #undef Factor | ||||
//============================================================================== | |||||
#if JUCE_MINGW && ! defined (__SSE2__) | |||||
#define JUCE_USE_SSE_INTRINSICS 0 | |||||
#endif | |||||
#ifndef JUCE_USE_SSE_INTRINSICS | |||||
#define JUCE_USE_SSE_INTRINSICS 1 | |||||
#endif | |||||
#if ! JUCE_INTEL | |||||
#undef JUCE_USE_SSE_INTRINSICS | |||||
#endif | |||||
#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON)) | |||||
#define JUCE_USE_ARM_NEON 1 | |||||
#endif | |||||
#if TARGET_IPHONE_SIMULATOR | |||||
#ifdef JUCE_USE_ARM_NEON | |||||
#undef JUCE_USE_ARM_NEON | |||||
#endif | |||||
#define JUCE_USE_ARM_NEON 0 | |||||
#endif | |||||
//============================================================================== | |||||
#include "buffers/juce_AudioDataConverters.h" | #include "buffers/juce_AudioDataConverters.h" | ||||
#include "buffers/juce_FloatVectorOperations.h" | #include "buffers/juce_FloatVectorOperations.h" | ||||
#include "buffers/juce_AudioSampleBuffer.h" | #include "buffers/juce_AudioSampleBuffer.h" | ||||
@@ -172,7 +172,7 @@ | |||||
#define JUCE_32BIT 1 | #define JUCE_32BIT 1 | ||||
#endif | #endif | ||||
#if defined (__arm__) || defined (__arm64__) | |||||
#if defined (__arm__) || defined (__arm64__) || defined (__aarch64__) | |||||
#define JUCE_ARM 1 | #define JUCE_ARM 1 | ||||
#elif __MMX__ || __SSE__ || __amd64__ | #elif __MMX__ || __SSE__ || __amd64__ | ||||
#define JUCE_INTEL 1 | #define JUCE_INTEL 1 | ||||