Browse Source

Fixed an issue where ScopedNoDenormals would do nothing on all platforms and added arm implementation

tags/2021-05-28
hogliux 7 years ago
parent
commit
f59a5dfc7f
5 changed files with 119 additions and 45 deletions
  1. +81
    -3
      modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp
  2. +12
    -18
      modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h
  3. +0
    -23
      modules/juce_audio_basics/juce_audio_basics.cpp
  4. +25
    -0
      modules/juce_audio_basics/juce_audio_basics.h
  5. +1
    -1
      modules/juce_core/system/juce_TargetPlatform.h

+ 81
- 3
modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp View File

@@ -1028,20 +1028,98 @@ double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int
#endif
}
intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
{
intptr_t fpsr = 0;
#if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
fpsr = static_cast<intptr_t> (_mm_getcsr());
#elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
#if defined (__arm64__) || defined (__aarch64__)
asm volatile("mrs %0, fpcr" : "=r" (fpsr));
#elif JUCE_USE_ARM_NEON
asm volatile("vmrs %0, fpscr" : "=r" (fpsr));
#endif
#else
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
jassertfalse; // No support for getting the floating point status register for your platform
#endif
#endif
return fpsr;
}
void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister (intptr_t fpsr) noexcept
{
#if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
auto fpsr_w = static_cast<uint32_t> (fpsr);
_mm_setcsr (fpsr_w);
#elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
#if defined (__arm64__) || defined (__aarch64__)
asm volatile("msr fpcr, %0" : : "ri" (fpsr));
#elif JUCE_USE_ARM_NEON
asm volatile("vmsr fpscr, %0" : : "ri" (fpsr));
#endif
#else
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
jassertfalse; // No support for getting the floating point status register for your platform
#endif
ignoreUnused (fpsr);
#endif
}
void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
{
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
#if JUCE_USE_SSE_INTRINSICS
_MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
intptr_t mask = _MM_FLUSH_ZERO_MASK;
#else /*JUCE_USE_ARM_NEON*/
intptr_t mask = (1 << 24 /* FZ */);
#endif
setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0));
#else
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
jassertfalse; // No support for flush to zero mode on your platform
#endif
ignoreUnused (shouldEnable);
#endif
}
void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept
{
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
#if JUCE_USE_SSE_INTRINSICS
intptr_t mask = 0x8040;
#else /*JUCE_USE_ARM_NEON*/
intptr_t mask = (1 << 24 /* FZ */) | (1 << 23 /* RMODE_1 */) | (1 << 22 /* RMODE_0 */);
#endif
setFpStatusRegister (getFpStatusRegister() | mask);
#else
#if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
jassertfalse; // No support for disable denormals mode on your platform
#endif
#endif
}
ScopedNoDenormals::ScopedNoDenormals() noexcept
{
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
#if JUCE_USE_SSE_INTRINSICS
const unsigned int mxcsr = _mm_getcsr();
_mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
intptr_t mask = 0x8040;
#else /*JUCE_USE_ARM_NEON*/
intptr_t mask = (1 << 24 /* FZ */) | (1 << 23 /* RMODE_1 */) | (1 << 22 /* RMODE_0 */);
#endif
fpsr = FloatVectorOperations::getFpStatusRegister();
FloatVectorOperations::setFpStatusRegister (fpsr | mask);
#endif
}
ScopedNoDenormals::~ScopedNoDenormals() noexcept
{
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
FloatVectorOperations::setFpStatusRegister (fpsr);
#endif
}
//==============================================================================


+ 12
- 18
modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h View File

@@ -28,6 +28,7 @@ namespace juce
#else
#define JUCE_SNAP_TO_ZERO(n) ignoreUnused (n)
#endif
class ScopedNoDenormals;
//==============================================================================
/**
@@ -219,6 +220,12 @@ public:
call before audio processing code where you really want to avoid denormalisation performance hits.
*/
static void JUCE_CALLTYPE disableDenormalisedNumberSupport() noexcept;
private:
friend ScopedNoDenormals;
static intptr_t JUCE_CALLTYPE getFpStatusRegister() noexcept;
static void JUCE_CALLTYPE setFpStatusRegister (intptr_t) noexcept;
};
//==============================================================================
@@ -229,26 +236,13 @@ public:
class ScopedNoDenormals
{
public:
inline ScopedNoDenormals() noexcept
{
#if JUCE_USE_SSE_INTRINSICS
mxcsr = _mm_getcsr();
_mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
#endif
}
inline ~ScopedNoDenormals() noexcept
{
#if JUCE_USE_SSE_INTRINSICS
_mm_setcsr (mxcsr);
#endif
}
ScopedNoDenormals() noexcept;
~ScopedNoDenormals() noexcept;
private:
#if JUCE_USE_SSE_INTRINSICS
unsigned int mxcsr;
#endif
#if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
intptr_t fpsr;
#endif
};
} // namespace juce

+ 0
- 23
modules/juce_audio_basics/juce_audio_basics.cpp View File

@@ -31,22 +31,10 @@
#include "juce_audio_basics.h"
#if JUCE_MINGW && ! defined (__SSE2__)
#define JUCE_USE_SSE_INTRINSICS 0
#endif
#if JUCE_MINGW && ! defined (alloca)
#define alloca __builtin_alloca
#endif
#ifndef JUCE_USE_SSE_INTRINSICS
#define JUCE_USE_SSE_INTRINSICS 1
#endif
#if ! JUCE_INTEL
#undef JUCE_USE_SSE_INTRINSICS
#endif
#if JUCE_USE_SSE_INTRINSICS
#include <emmintrin.h>
#endif
@@ -61,17 +49,6 @@
#undef JUCE_USE_VDSP_FRAMEWORK
#endif
#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON))
#define JUCE_USE_ARM_NEON 1
#endif
#if TARGET_IPHONE_SIMULATOR
#ifdef JUCE_USE_ARM_NEON
#undef JUCE_USE_ARM_NEON
#endif
#define JUCE_USE_ARM_NEON 0
#endif
#if JUCE_USE_ARM_NEON
#include <arm_neon.h>
#endif


+ 25
- 0
modules/juce_audio_basics/juce_audio_basics.h View File

@@ -55,6 +55,31 @@
#undef Complex // apparently some C libraries actually define these symbols (!)
#undef Factor
//==============================================================================
#if JUCE_MINGW && ! defined (__SSE2__)
#define JUCE_USE_SSE_INTRINSICS 0
#endif
#ifndef JUCE_USE_SSE_INTRINSICS
#define JUCE_USE_SSE_INTRINSICS 1
#endif
#if ! JUCE_INTEL
#undef JUCE_USE_SSE_INTRINSICS
#endif
#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON))
#define JUCE_USE_ARM_NEON 1
#endif
#if TARGET_IPHONE_SIMULATOR
#ifdef JUCE_USE_ARM_NEON
#undef JUCE_USE_ARM_NEON
#endif
#define JUCE_USE_ARM_NEON 0
#endif
//==============================================================================
#include "buffers/juce_AudioDataConverters.h"
#include "buffers/juce_FloatVectorOperations.h"
#include "buffers/juce_AudioSampleBuffer.h"


+ 1
- 1
modules/juce_core/system/juce_TargetPlatform.h View File

@@ -172,7 +172,7 @@
#define JUCE_32BIT 1
#endif
#if defined (__arm__) || defined (__arm64__)
#if defined (__arm__) || defined (__arm64__) || defined (__aarch64__)
#define JUCE_ARM 1
#elif __MMX__ || __SSE__ || __amd64__
#define JUCE_INTEL 1


Loading…
Cancel
Save