From 01935e33388a2c649ca65e8a6ad80c30ad07fd7c Mon Sep 17 00:00:00 2001 From: Tom Poole Date: Tue, 11 Jun 2019 21:23:56 +0100 Subject: [PATCH] DSP: Guarded a rogue FMA instruction in the AVX native ops --- modules/juce_dsp/native/juce_avx_SIMDNativeOps.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h b/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h index 7ecf95b5cc..270660b442 100644 --- a/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h +++ b/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h @@ -91,7 +91,6 @@ struct SIMDNativeOps static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThan (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GT_OQ); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GE_OQ); } static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256 a, __m256 b) noexcept { return (_mm256_movemask_ps (equal (a, b)) == 0xff); } - static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c) noexcept { return _mm256_fmadd_ps (b, c, a); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupeven (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupodd (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE swapevenodd (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); } @@ -99,6 +98,15 @@ struct SIMDNativeOps static forcedinline __m256 JUCE_VECTOR_CALLTYPE set (__m256 v, size_t i, float s) noexcept { return SIMDFallbackOps::set (v, i, s); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE truncate (__m256 a) noexcept { return _mm256_cvtepi32_ps (_mm256_cvttps_epi32 (a)); } + static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c) noexcept + { + #if __FMA__ + return _mm256_fmadd_ps (b, c, a); + #else + return add (a, mul (b, c)); + #endif + } + static forcedinline __m256 JUCE_VECTOR_CALLTYPE oddevensum (__m256 a) noexcept { a = _mm256_add_ps (_mm256_shuffle_ps (a, a, _MM_SHUFFLE (1, 0, 3, 2)), a);