Browse Source

SSE SIMDNativeOps: Reimplement sum for SSE3 to work around an AppleClang bug

With clang 13.0.0, and Apple clang version 13.1.6 (clang-1316.0.21.2),
the following code fails to compile with `-std=c++20 -O3 -msse3`:

    #include <immintrin.h>

    auto test (__m128 a)
    {
        return _mm_hadd_ps (_mm_hadd_ps (a, a), a);
    }
pull/22/head
reuk 3 years ago
parent
commit
970483b1cd
No known key found for this signature in database GPG Key ID: FCB43929F012EE5C
1 changed files with 5 additions and 3 deletions
  1. +5
    -3
      modules/juce_dsp/native/juce_sse_SIMDNativeOps.h

+ 5
- 3
modules/juce_dsp/native/juce_sse_SIMDNativeOps.h View File

@@ -106,11 +106,13 @@ struct SIMDNativeOps<float>
static forcedinline float JUCE_VECTOR_CALLTYPE sum (__m128 a) noexcept
{
#if defined(__SSE4__)
__m128 retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
const auto retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
#elif defined(__SSE3__)
__m128 retval = _mm_hadd_ps (_mm_hadd_ps (a, a), a);
const auto shuffled = _mm_movehdup_ps (a);
const auto sums = _mm_add_ps (a, shuffled);
const auto retval = _mm_add_ss (sums, _mm_movehl_ps (shuffled, sums));
#else
__m128 retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
auto retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
retval = _mm_add_ps (retval, _mm_shuffle_ps (retval, retval, 0xb1));
#endif
return _mm_cvtss_f32 (retval);


Loading…
Cancel
Save