Browse Source

Various additions to SIMDRegister

tags/2021-05-28
Zsolt Garamvolgyi jules 8 years ago
parent
commit
8f02179bbf
8 changed files with 250 additions and 135 deletions
  1. +25
    -0
      modules/juce_dsp/containers/juce_SIMDRegister.h
  2. +36
    -1
      modules/juce_dsp/containers/juce_SIMDRegister_test.cpp
  3. +9
    -0
      modules/juce_dsp/native/juce_avx_SIMDNativeOps.h
  4. +13
    -0
      modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h
  5. +155
    -132
      modules/juce_dsp/native/juce_neon_SIMDNativeOps.h
  6. +10
    -0
      modules/juce_dsp/native/juce_sse_SIMDNativeOps.h
  7. +1
    -1
      modules/juce_dsp/processors/juce_FIRFilter.h
  8. +1
    -1
      modules/juce_dsp/processors/juce_FIRFilter_test.cpp

+ 25
- 0
modules/juce_dsp/containers/juce_SIMDRegister.h View File

@@ -102,6 +102,18 @@ struct SIMDRegister
vSIMDType value;
/** Default constructor. */
inline JUCE_VECTOR_CALLTYPE SIMDRegister() noexcept {}
/** Constructs an object from the native SIMD type. */
inline JUCE_VECTOR_CALLTYPE SIMDRegister (vSIMDType a) noexcept : value (a) {}
/** Constructs an object from a scalar type by broadcasting it to all elements. */
inline JUCE_VECTOR_CALLTYPE SIMDRegister (Type s) noexcept { *this = s; }
/** Destrutor. */
inline JUCE_VECTOR_CALLTYPE ~SIMDRegister() noexcept {}
//==============================================================================
/** Returns the number of elements in this vector. */
static constexpr size_t size() noexcept { return SIMDNumElements; }
@@ -232,6 +244,19 @@ struct SIMDRegister
/** Returns a vector where each element is the bit-xor'd value of the corresponding element in the receiver and the scalar s.*/
inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (MaskType s) const noexcept { return { NativeOps::bit_xor (value, toVecType (s)) }; }
//==============================================================================
/** Returns true if all elements-wise comparisons return true. */
inline bool JUCE_VECTOR_CALLTYPE operator== (SIMDRegister other) const noexcept { return NativeOps::allEqual (value, other.value); }
/** Returns true if any elements-wise comparisons return false. */
inline bool JUCE_VECTOR_CALLTYPE operator!= (SIMDRegister other) const noexcept { return ! (*this == other); }
/** Returns true if all elements are equal to the scalar. */
inline bool JUCE_VECTOR_CALLTYPE operator== (Type s) const noexcept { return *this == SIMDRegister::expand (s); }
/** Returns true if any elements are not equal to the scalar. */
inline bool JUCE_VECTOR_CALLTYPE operator!= (Type s) const noexcept { return ! (*this == s); }
//==============================================================================
/** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
if the corresponding element of a is equal to the corresponding element of b, or zero otherwise.


+ 36
- 1
modules/juce_dsp/containers/juce_SIMDRegister_test.cpp View File

@@ -370,8 +370,10 @@ public:
{
type array_a [SIMDRegister<type>::SIMDNumElements];
union
union ConversionUnion
{
inline ConversionUnion() {}
inline ~ConversionUnion() {}
SIMDRegister<type> floatVersion;
vMaskType intVersion;
} a, b;
@@ -512,6 +514,39 @@ public:
u.expect (vecEqualToArray (le, array_le ));
u.expect (vecEqualToArray (gt, array_gt ));
u.expect (vecEqualToArray (ge, array_ge ));
do
{
SIMDRegister_test_internal::fillRandom (array_a, SIMDRegister<type>::SIMDNumElements, random);
SIMDRegister_test_internal::fillRandom (array_b, SIMDRegister<type>::SIMDNumElements, random);
} while (std::equal (array_a, array_a + SIMDRegister<type>::SIMDNumElements, array_b));
copy (a, array_a);
copy (b, array_b);
u.expect (a != b);
u.expect (b != a);
u.expect (! (a == b));
u.expect (! (b == a));
SIMDRegister_test_internal::fillRandom (array_a, SIMDRegister<type>::SIMDNumElements, random);
copy (a, array_a);
copy (b, array_a);
u.expect (a == b);
u.expect (b == a);
u.expect (! (a != b));
u.expect (! (b != a));
auto scalar = a[0];
a = SIMDRegister<type>::expand (scalar);
u.expect (a == scalar);
u.expect (! (a != scalar));
scalar--;
u.expect (a != scalar);
u.expect (! (a == scalar));
}
}
};


+ 9
- 0
modules/juce_dsp/native/juce_avx_SIMDNativeOps.h View File

@@ -82,6 +82,7 @@ struct SIMDNativeOps<float>
static forcedinline __m256 JUCE_VECTOR_CALLTYPE notEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_NEQ_OQ); }
static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThan (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GT_OQ); }
static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GE_OQ); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256 a, __m256 b) noexcept { return (_mm256_movemask_ps (equal (a, b)) == 0xff); }
static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c) noexcept { return _mm256_fmadd_ps (b, c, a); }
static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupeven (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupodd (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
@@ -141,6 +142,7 @@ struct SIMDNativeOps<double>
static forcedinline __m256d JUCE_VECTOR_CALLTYPE notEqual (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_NEQ_OQ); }
static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThan (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_GT_OQ); }
static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_GE_OQ); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256d a, __m256d b) noexcept { return (_mm256_movemask_pd (equal (a, b)) == 0xf); }
static forcedinline __m256d JUCE_VECTOR_CALLTYPE multiplyAdd (__m256d a, __m256d b, __m256d c) noexcept { return _mm256_add_pd (a, _mm256_mul_pd (b, c)); }
static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupeven (__m256d a) noexcept { return _mm256_shuffle_pd (a, a, 0); }
static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupodd (__m256d a) noexcept { return _mm256_shuffle_pd (a, a, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)); }
@@ -261,6 +263,7 @@ struct SIMDNativeOps<uint8_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept { return _mm256_cmpeq_epi8 (a, b); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept { return _mm256_cmpgt_epi8 (ssign (a), ssign (b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
@@ -336,6 +339,7 @@ struct SIMDNativeOps<int16_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
//==============================================================================
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept
@@ -390,6 +394,7 @@ struct SIMDNativeOps<uint16_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
//==============================================================================
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept
@@ -443,6 +448,7 @@ struct SIMDNativeOps<int32_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
//==============================================================================
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int32_t* a) noexcept
@@ -495,6 +501,7 @@ struct SIMDNativeOps<uint32_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
//==============================================================================
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept
@@ -543,6 +550,7 @@ struct SIMDNativeOps<int64_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
//==============================================================================
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int64_t* a) noexcept
@@ -614,6 +622,7 @@ struct SIMDNativeOps<uint64_t>
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
//==============================================================================
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept


+ 13
- 0
modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h View File

@@ -117,6 +117,19 @@ struct SIMDFallbackOps
return retval;
}
//==============================================================================
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept
{
auto* aSrc = reinterpret_cast<const ScalarType*> (&a);
auto* bSrc = reinterpret_cast<const ScalarType*> (&b);
for (size_t i = 0; i < n; ++i)
if (aSrc[i] != bSrc[i])
return false;
return true;
}
//==============================================================================
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
{


+ 155
- 132
modules/juce_dsp/native/juce_neon_SIMDNativeOps.h View File

@@ -51,86 +51,82 @@ template <typename type>
struct SIMDNativeOps;
//==============================================================================
/** Single-precision floating point NEON intrinsics. */
/** Unsigned 32-bit integer NEON intrinsics. */
template <>
struct SIMDNativeOps<float>
struct SIMDNativeOps<uint32_t>
{
//==============================================================================
typedef float32x4_t vSIMDType;
typedef uint32x4_t vMaskType;
typedef SIMDFallbackOps<float, vSIMDType> fb;
typedef uint32x4_t vSIMDType;
typedef SIMDFallbackOps<uint32_t, vSIMDType> fb;
//==============================================================================
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
DECLARE_NEON_SIMD_CONST (float, kOne);
DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
//==============================================================================
static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
static forcedinline float sum (vSIMDType a) noexcept { return fb::sum (a); }
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
//==============================================================================
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
static forcedinline uint32_t sum (vSIMDType a) noexcept
{
vSIMDType rr_ir = mul (a, dupeven (b));
vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a));
return vget_lane_u32 (vpadd_u32 (rr, rr), 0);
}
};
//==============================================================================
/** Double-precision floating point NEON intrinsics does not exist in NEON
so we need to emulate this.
*/
/** Signed 32-bit integer NEON intrinsics. */
template <>
struct SIMDNativeOps<double>
struct SIMDNativeOps<int32_t>
{
//==============================================================================
typedef struct { double values [2]; } vSIMDType;
typedef SIMDFallbackOps<double, vSIMDType> fb;
typedef int32x4_t vSIMDType;
typedef SIMDFallbackOps<int32_t, vSIMDType> fb;
static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); }
static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); }
static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
//==============================================================================
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
//==============================================================================
static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
static forcedinline int32_t sum (vSIMDType a) noexcept
{
auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a));
rr = vpadd_s32 (rr, rr);
return vget_lane_s32 (rr, 0);
}
};
//==============================================================================
@@ -163,6 +159,7 @@ struct SIMDNativeOps<int8_t>
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s8 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s8 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); }
static forcedinline int8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
@@ -197,6 +194,7 @@ struct SIMDNativeOps<uint8_t>
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u8 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u8 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); }
static forcedinline uint8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
@@ -231,6 +229,7 @@ struct SIMDNativeOps<int16_t>
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s16 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s16 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); }
static forcedinline int16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
@@ -266,79 +265,11 @@ struct SIMDNativeOps<uint16_t>
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u16 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u16 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); }
static forcedinline uint16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
//==============================================================================
/** Signed 32-bit integer NEON intrinsics. */
template <>
struct SIMDNativeOps<int32_t>
{
//==============================================================================
typedef int32x4_t vSIMDType;
typedef SIMDFallbackOps<int32_t, vSIMDType> fb;
//==============================================================================
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
//==============================================================================
static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
static forcedinline int32_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
//==============================================================================
/** Unsigned 32-bit integer NEON intrinsics. */
template <>
struct SIMDNativeOps<uint32_t>
{
//==============================================================================
typedef uint32x4_t vSIMDType;
typedef SIMDFallbackOps<uint32_t, vSIMDType> fb;
//==============================================================================
DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
//==============================================================================
static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
static forcedinline uint32_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
//==============================================================================
/** Signed 64-bit integer NEON intrinsics. */
template <>
@@ -369,6 +300,7 @@ struct SIMDNativeOps<int64_t>
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
static forcedinline int64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
@@ -404,10 +336,101 @@ struct SIMDNativeOps<uint64_t>
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
};
//==============================================================================
/** Single-precision floating point NEON intrinsics. */
template <>
struct SIMDNativeOps<float>
{
//==============================================================================
typedef float32x4_t vSIMDType;
typedef uint32x4_t vMaskType;
typedef SIMDFallbackOps<float, vSIMDType> fb;
//==============================================================================
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
DECLARE_NEON_SIMD_CONST (float, kOne);
//==============================================================================
static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
//==============================================================================
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
{
vSIMDType rr_ir = mul (a, dupeven (b));
vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
}
static forcedinline float sum (vSIMDType a) noexcept
{
auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a));
return vget_lane_f32 (vpadd_f32 (rr, rr), 0);
}
};
//==============================================================================
/** Double-precision floating point NEON intrinsics does not exist in NEON
so we need to emulate this.
*/
template <>
struct SIMDNativeOps<double>
{
//==============================================================================
typedef struct { double values [2]; } vSIMDType;
typedef SIMDFallbackOps<double, vSIMDType> fb;
static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); }
static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); }
static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); }
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); }
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); }
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return fb::allEqual (a, b); }
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
};
#endif
} // namespace dsp


+ 10
- 0
modules/juce_dsp/native/juce_sse_SIMDNativeOps.h View File

@@ -81,6 +81,7 @@ struct SIMDNativeOps<float>
static forcedinline __m128 JUCE_VECTOR_CALLTYPE notEqual (__m128 a, __m128 b) noexcept { return _mm_cmpneq_ps (a, b); }
static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThan (__m128 a, __m128 b) noexcept { return _mm_cmpgt_ps (a, b); }
static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128 a, __m128 b) noexcept { return _mm_cmpge_ps (a, b); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128 a, __m128 b ) noexcept { return (_mm_movemask_ps (equal (a, b)) == 0xf); }
static forcedinline __m128 JUCE_VECTOR_CALLTYPE multiplyAdd (__m128 a, __m128 b, __m128 c) noexcept { return _mm_add_ps (a, _mm_mul_ps (b, c)); }
static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupeven (__m128 a) noexcept { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupodd (__m128 a) noexcept { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
@@ -142,6 +143,7 @@ struct SIMDNativeOps<double>
static forcedinline __m128d JUCE_VECTOR_CALLTYPE notEqual (__m128d a, __m128d b) noexcept { return _mm_cmpneq_pd (a, b); }
static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThan (__m128d a, __m128d b) noexcept { return _mm_cmpgt_pd (a, b); }
static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128d a, __m128d b) noexcept { return _mm_cmpge_pd (a, b); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128d a, __m128d b ) noexcept { return (_mm_movemask_pd (equal (a, b)) == 0x3); }
static forcedinline __m128d JUCE_VECTOR_CALLTYPE multiplyAdd (__m128d a, __m128d b, __m128d c) noexcept { return _mm_add_pd (a, _mm_mul_pd (b, c)); }
static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupeven (__m128d a) noexcept { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 0)); }
static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupodd (__m128d a) noexcept { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (1, 1)); }
@@ -201,6 +203,7 @@ struct SIMDNativeOps<int8_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int8_t* a) noexcept
@@ -282,6 +285,7 @@ struct SIMDNativeOps<uint8_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint8_t* a) noexcept
@@ -363,6 +367,7 @@ struct SIMDNativeOps<int16_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept
@@ -431,6 +436,7 @@ struct SIMDNativeOps<uint16_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept
@@ -490,6 +496,7 @@ struct SIMDNativeOps<int32_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i value, int32_t* dest) noexcept
@@ -575,6 +582,7 @@ struct SIMDNativeOps<uint32_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept
@@ -671,6 +679,7 @@ struct SIMDNativeOps<int64_t>
static forcedinline __m128i greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i value, int64_t* dest) noexcept
@@ -762,6 +771,7 @@ struct SIMDNativeOps<uint64_t>
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
//==============================================================================
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept


+ 1
- 1
modules/juce_dsp/processors/juce_FIRFilter.h View File

@@ -172,7 +172,7 @@ namespace FIR
static SampleType JUCE_VECTOR_CALLTYPE processSingleSample (SampleType sample, SampleType* buf,
const NumericType* fir, size_t m, size_t& p) noexcept
{
SampleType out = {};
SampleType out (0);
buf[p] = sample;


+ 1
- 1
modules/juce_dsp/processors/juce_FIRFilter_test.cpp View File

@@ -106,7 +106,7 @@ class FIRFilterTest : public UnitTest
buffer[0] = input[i];
SampleType sum{};
SampleType sum (0);
for (size_t j = 0; j < numCoefficients; ++j)
sum += buffer[j] * firCoefficients[j];


Loading…
Cancel
Save