/* ============================================================================== This file is part of the JUCE library. Copyright (c) 2017 - ROLI Ltd. JUCE is an open source library subject to commercial or open-source licensing. By using JUCE, you agree to the terms of both the JUCE 5 End-User License Agreement and JUCE 5 Privacy Policy (both updated and effective as of the 27th April 2017). End User License Agreement: www.juce.com/juce-5-licence Privacy Policy: www.juce.com/juce-5-privacy-policy Or: You may also use this code under the terms of the GPL v3 (see www.gnu.org/licenses). JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE DISCLAIMED. ============================================================================== */ namespace juce { namespace dsp { /** A template specialisation to find corresponding mask type for primitives. */ namespace SIMDInternal { template struct MaskTypeFor { typedef Primitive type; }; template <> struct MaskTypeFor { typedef uint32_t type; }; template <> struct MaskTypeFor { typedef uint64_t type; }; template <> struct MaskTypeFor { typedef uint8_t type; }; template <> struct MaskTypeFor { typedef uint8_t type; }; template <> struct MaskTypeFor { typedef uint16_t type; }; template <> struct MaskTypeFor { typedef uint32_t type; }; template <> struct MaskTypeFor { typedef uint64_t type; }; template <> struct MaskTypeFor > { typedef uint32_t type; }; template <> struct MaskTypeFor > { typedef uint64_t type; }; template struct PrimitiveType { typedef Primitive type; }; template struct PrimitiveType> { typedef Primitive type; }; template struct Log2Helper { enum { value = Log2Helper::value + 1 }; }; template <> struct Log2Helper<1> { enum { value = 0 }; }; } /** Useful fallback routines to use if the native SIMD op is not supported. You should never need to use this directly. Use juce_SIMDRegister instead. */ template struct SIMDFallbackOps { static constexpr size_t n = sizeof (vSIMDType) / sizeof (ScalarType); static constexpr size_t mask = (sizeof (vSIMDType) / sizeof (ScalarType)) - 1; static constexpr size_t bits = SIMDInternal::Log2Helper::value; // corresponding mask type typedef typename SIMDInternal::MaskTypeFor::type MaskType; // fallback methods static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return apply (a, b); } static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return apply (a, b); } static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return apply (a, b); } static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return bitapply (a, b); } static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return bitapply (a, b); } static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return bitapply (a, b); } static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return bitapply (a, b); } static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return apply (a, b); } static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return apply (a, b); } static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return cmp (a, b); } static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return cmp (a, b); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return cmp (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return cmp (a, b); } static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); auto* aSrc = reinterpret_cast (&a); for (size_t i = 0; i < n; ++i) dst [i] = ~aSrc [i]; return retval; } static forcedinline ScalarType sum (vSIMDType a) noexcept { auto retval = static_cast (0); auto* aSrc = reinterpret_cast (&a); for (size_t i = 0; i < n; ++i) retval += aSrc [i]; return retval; } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); auto* aSrc = reinterpret_cast (&a); auto* bSrc = reinterpret_cast (&b); auto* cSrc = reinterpret_cast (&c); for (size_t i = 0; i < n; ++i) dst [i] = aSrc [i] + (bSrc [i] * cSrc [i]); return retval; } //============================================================================== static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { auto* aSrc = reinterpret_cast (&a); auto* bSrc = reinterpret_cast (&b); for (size_t i = 0; i < n; ++i) if (aSrc[i] != bSrc[i]) return false; return true; } //============================================================================== static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { vSIMDType retval; auto* dst = reinterpret_cast*> (&retval); auto* aSrc = reinterpret_cast*> (&a); auto* bSrc = reinterpret_cast*> (&b); const int m = n >> 1; for (int i = 0; i < m; ++i) dst [i] = aSrc [i] * bSrc [i]; return retval; } struct ScalarAdd { static forcedinline ScalarType op (ScalarType a, ScalarType b) noexcept { return a + b; } }; struct ScalarSub { static forcedinline ScalarType op (ScalarType a, ScalarType b) noexcept { return a - b; } }; struct ScalarMul { static forcedinline ScalarType op (ScalarType a, ScalarType b) noexcept { return a * b; } }; struct ScalarMin { static forcedinline ScalarType op (ScalarType a, ScalarType b) noexcept { return jmin (a, b); } }; struct ScalarMax { static forcedinline ScalarType op (ScalarType a, ScalarType b) noexcept { return jmax (a, b); } }; struct ScalarAnd { static forcedinline MaskType op (MaskType a, MaskType b) noexcept { return a & b; } }; struct ScalarOr { static forcedinline MaskType op (MaskType a, MaskType b) noexcept { return a | b; } }; struct ScalarXor { static forcedinline MaskType op (MaskType a, MaskType b) noexcept { return a ^ b; } }; struct ScalarNot { static forcedinline MaskType op (MaskType a, MaskType b) noexcept { return (~a) & b; } }; struct ScalarEq { static forcedinline bool op (ScalarType a, ScalarType b) noexcept { return (a == b); } }; struct ScalarNeq { static forcedinline bool op (ScalarType a, ScalarType b) noexcept { return (a != b); } }; struct ScalarGt { static forcedinline bool op (ScalarType a, ScalarType b) noexcept { return (a > b); } }; struct ScalarGeq { static forcedinline bool op (ScalarType a, ScalarType b) noexcept { return (a >= b); } }; // generic apply routines for operations above template static forcedinline vSIMDType apply (vSIMDType a, vSIMDType b) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); auto* aSrc = reinterpret_cast (&a); auto* bSrc = reinterpret_cast (&b); for (size_t i = 0; i < n; ++i) dst [i] = Op::op (aSrc [i], bSrc [i]); return retval; } template static forcedinline vSIMDType cmp (vSIMDType a, vSIMDType b) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); auto* aSrc = reinterpret_cast (&a); auto* bSrc = reinterpret_cast (&b); for (size_t i = 0; i < n; ++i) dst [i] = Op::op (aSrc [i], bSrc [i]) ? static_cast (-1) : static_cast (0); return retval; } template static forcedinline vSIMDType bitapply (vSIMDType a, vSIMDType b) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); auto* aSrc = reinterpret_cast (&a); auto* bSrc = reinterpret_cast (&b); for (size_t i = 0; i < n; ++i) dst [i] = Op::op (aSrc [i], bSrc [i]); return retval; } static forcedinline vSIMDType expand (ScalarType s) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); for (size_t i = 0; i < n; ++i) dst [i] = s; return retval; } static forcedinline vSIMDType load (const ScalarType* a) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); for (size_t i = 0; i < n; ++i) dst [i] = a[i]; return retval; } static forcedinline void store (vSIMDType value, ScalarType* dest) noexcept { const auto* src = reinterpret_cast (&value); for (size_t i = 0; i < n; ++i) dest[i] = src[i]; } template static forcedinline vSIMDType shuffle (vSIMDType a) noexcept { vSIMDType retval; auto* dst = reinterpret_cast (&retval); auto* aSrc = reinterpret_cast (&a); // the compiler will unroll this loop and the index can // be computed at compile-time, so this will be super fast for (size_t i = 0; i < n; ++i) dst [i] = aSrc [(shuffle_idx >> (bits * i)) & mask]; return retval; } }; } // namespace dsp } // namespace juce