Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

401 lines
21KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2022 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 7 End-User License
  8. Agreement and JUCE Privacy Policy.
  9. End User License Agreement: www.juce.com/juce-7-licence
  10. Privacy Policy: www.juce.com/juce-privacy-policy
  11. Or: You may also use this code under the terms of the GPL v3 (see
  12. www.gnu.org/licenses).
  13. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  14. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  15. DISCLAIMED.
  16. ==============================================================================
  17. */
  18. namespace juce
  19. {
  20. namespace dsp
  21. {
  22. #ifndef DOXYGEN
  23. // This class is needed internally.
  24. template <typename Scalar>
  25. struct CmplxSIMDOps;
  26. #endif
  27. //==============================================================================
  28. /**
  29. A wrapper around the platform's native SIMD register type.
  30. This class is only available on SIMD machines. Use JUCE_USE_SIMD to query
  31. if SIMD is available for your system.
  32. SIMDRegister<Type> is a templated class representing the native
  33. vectorized version of FloatingType. SIMDRegister supports all numerical
  34. primitive types and std:complex<float> and std::complex<double> supports
  35. and most operations of the corresponding primitive
  36. type. Additionally, SIMDRegister can be accessed like an array to extract
  37. the individual elements.
  38. If you are using SIMDRegister as a pointer, then you must ensure that the
  39. memory is sufficiently aligned for SIMD vector operations. Failing to do so
  40. will result in crashes or very slow code. Use SIMDRegister::isSIMDAligned
  41. to query if a pointer is sufficiently aligned for SIMD vector operations.
  42. Note that using SIMDRegister without enabling optimizations will result
  43. in code with very poor performance.
  44. @tags{DSP}
  45. */
  46. template <typename Type>
  47. struct SIMDRegister
  48. {
  49. //==============================================================================
  50. /** The type that represents the individual constituents of the SIMD Register */
  51. using ElementType = Type;
  52. /** STL compatible value_type definition (same as ElementType). */
  53. using value_type = ElementType;
  54. /** The corresponding primitive integer type, for example, this will be int32_t
  55. if type is a float. */
  56. using MaskType = typename SIMDInternal::MaskTypeFor<ElementType>::type;
  57. //==============================================================================
  58. // Here are some types which are needed internally
  59. /** The native primitive type (used internally). */
  60. using PrimitiveType = typename SIMDInternal::PrimitiveType<ElementType>::type;
  61. /** The native operations for this platform and type combination (used internally) */
  62. using NativeOps = SIMDNativeOps<PrimitiveType>;
  63. /** The native type (used internally). */
  64. using vSIMDType = typename NativeOps::vSIMDType;
  65. /** The corresponding integer SIMDRegister type (used internally). */
  66. using vMaskType = SIMDRegister<MaskType>;
  67. /** The internal native type for the corresponding mask type (used internally). */
  68. using vMaskSIMDType = typename vMaskType::vSIMDType;
  69. /** Wrapper for operations which need to be handled differently for complex
  70. and scalar types (used internally). */
  71. using CmplxOps = CmplxSIMDOps<ElementType>;
  72. /** Type which is returned when using the subscript operator. The returned type
  73. should be used just like the type ElementType. */
  74. struct ElementAccess;
  75. //==============================================================================
  76. /** The size in bytes of this register. */
  77. static constexpr size_t SIMDRegisterSize = sizeof (vSIMDType);
  78. /** The number of elements that this vector can hold. */
  79. static constexpr size_t SIMDNumElements = SIMDRegisterSize / sizeof (ElementType);
  80. vSIMDType value;
  81. /** Default constructor. */
  82. inline SIMDRegister() noexcept = default;
  83. /** Constructs an object from the native SIMD type. */
  84. inline SIMDRegister (vSIMDType a) noexcept : value (a) {}
  85. /** Constructs an object from a scalar type by broadcasting it to all elements. */
  86. inline SIMDRegister (Type s) noexcept { *this = s; }
  87. //==============================================================================
  88. /** Returns the number of elements in this vector. */
  89. static constexpr size_t size() noexcept { return SIMDNumElements; }
  90. //==============================================================================
  91. /** Creates a new SIMDRegister from the corresponding scalar primitive.
  92. The scalar is extended to all elements of the vector. */
  93. static SIMDRegister JUCE_VECTOR_CALLTYPE expand (ElementType s) noexcept { return {CmplxOps::expand (s)}; }
  94. /** Creates a new SIMDRegister from the internal SIMD type (for example
  95. __mm128 for single-precision floating point on SSE architectures). */
  96. static SIMDRegister JUCE_VECTOR_CALLTYPE fromNative (vSIMDType a) noexcept { return {a}; }
  97. /** Creates a new SIMDRegister from the first SIMDNumElements of a scalar array. */
  98. static SIMDRegister JUCE_VECTOR_CALLTYPE fromRawArray (const ElementType* a) noexcept
  99. {
  100. jassert (isSIMDAligned (a));
  101. return {CmplxOps::load (a)};
  102. }
  103. /** Copies the elements of the SIMDRegister to a scalar array in memory. */
  104. inline void JUCE_VECTOR_CALLTYPE copyToRawArray (ElementType* a) const noexcept
  105. {
  106. jassert (isSIMDAligned (a));
  107. CmplxOps::store (value, a);
  108. }
  109. //==============================================================================
  110. /** Returns the idx-th element of the receiver. Note that this does not check if idx
  111. is larger than the native register size. */
  112. inline ElementType JUCE_VECTOR_CALLTYPE get (size_t idx) const noexcept
  113. {
  114. jassert (idx < SIMDNumElements);
  115. return CmplxOps::get (value, idx);
  116. }
  117. /** Sets the idx-th element of the receiver. Note that this does not check if idx
  118. is larger than the native register size. */
  119. inline void JUCE_VECTOR_CALLTYPE set (size_t idx, ElementType v) noexcept
  120. {
  121. jassert (idx < SIMDNumElements);
  122. value = CmplxOps::set (value, idx, v);
  123. }
  124. //==============================================================================
  125. /** Returns the idx-th element of the receiver. Note that this does not check if idx
  126. is larger than the native register size. */
  127. inline ElementType JUCE_VECTOR_CALLTYPE operator[] (size_t idx) const noexcept
  128. {
  129. return get (idx);
  130. }
  131. /** Returns the idx-th element of the receiver. Note that this does not check if idx
  132. is larger than the native register size. */
  133. inline ElementAccess JUCE_VECTOR_CALLTYPE operator[] (size_t idx) noexcept
  134. {
  135. jassert (idx < SIMDNumElements);
  136. return ElementAccess (*this, idx);
  137. }
  138. //==============================================================================
  139. /** Adds another SIMDRegister to the receiver. */
  140. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator+= (SIMDRegister v) noexcept { value = NativeOps::add (value, v.value); return *this; }
  141. /** Subtracts another SIMDRegister to the receiver. */
  142. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator-= (SIMDRegister v) noexcept { value = NativeOps::sub (value, v.value); return *this; }
  143. /** Multiplies another SIMDRegister to the receiver. */
  144. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator*= (SIMDRegister v) noexcept { value = CmplxOps::mul (value, v.value); return *this; }
  145. //==============================================================================
  146. /** Broadcasts the scalar to all elements of the receiver. */
  147. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator= (ElementType s) noexcept { value = CmplxOps::expand (s); return *this; }
  148. /** Adds a scalar to the receiver. */
  149. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator+= (ElementType s) noexcept { value = NativeOps::add (value, CmplxOps::expand (s)); return *this; }
  150. /** Subtracts a scalar to the receiver. */
  151. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator-= (ElementType s) noexcept { value = NativeOps::sub (value, CmplxOps::expand (s)); return *this; }
  152. /** Multiplies a scalar to the receiver. */
  153. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator*= (ElementType s) noexcept { value = CmplxOps::mul (value, CmplxOps::expand (s)); return *this; }
  154. //==============================================================================
  155. /** Bit-and the receiver with SIMDRegister v and store the result in the receiver. */
  156. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator&= (vMaskType v) noexcept { value = NativeOps::bit_and (value, toVecType (v.value)); return *this; }
  157. /** Bit-or the receiver with SIMDRegister v and store the result in the receiver. */
  158. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator|= (vMaskType v) noexcept { value = NativeOps::bit_or (value, toVecType (v.value)); return *this; }
  159. /** Bit-xor the receiver with SIMDRegister v and store the result in the receiver. */
  160. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator^= (vMaskType v) noexcept { value = NativeOps::bit_xor (value, toVecType (v.value)); return *this; }
  161. //==============================================================================
  162. /** Bit-and each element of the receiver with the scalar s and store the result in the receiver.*/
  163. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator&= (MaskType s) noexcept { value = NativeOps::bit_and (value, toVecType (s)); return *this; }
  164. /** Bit-or each element of the receiver with the scalar s and store the result in the receiver.*/
  165. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator|= (MaskType s) noexcept { value = NativeOps::bit_or (value, toVecType (s)); return *this; }
  166. /** Bit-xor each element of the receiver with the scalar s and store the result in the receiver.*/
  167. inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator^= (MaskType s) noexcept { value = NativeOps::bit_xor (value, toVecType (s)); return *this; }
  168. //==============================================================================
  169. /** Returns the sum of the receiver and v.*/
  170. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator+ (SIMDRegister v) const noexcept { return { NativeOps::add (value, v.value) }; }
  171. /** Returns the difference of the receiver and v.*/
  172. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator- (SIMDRegister v) const noexcept { return { NativeOps::sub (value, v.value) }; }
  173. /** Returns the product of the receiver and v.*/
  174. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator* (SIMDRegister v) const noexcept { return { CmplxOps::mul (value, v.value) }; }
  175. //==============================================================================
  176. /** Returns a vector where each element is the sum of the corresponding element in the receiver and the scalar s.*/
  177. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator+ (ElementType s) const noexcept { return { NativeOps::add (value, CmplxOps::expand (s)) }; }
  178. /** Returns a vector where each element is the difference of the corresponding element in the receiver and the scalar s.*/
  179. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator- (ElementType s) const noexcept { return { NativeOps::sub (value, CmplxOps::expand (s)) }; }
  180. /** Returns a vector where each element is the product of the corresponding element in the receiver and the scalar s.*/
  181. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator* (ElementType s) const noexcept { return { CmplxOps::mul (value, CmplxOps::expand (s)) }; }
  182. //==============================================================================
  183. /** Returns the bit-and of the receiver and v. */
  184. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator& (vMaskType v) const noexcept { return { NativeOps::bit_and (value, toVecType (v.value)) }; }
  185. /** Returns the bit-or of the receiver and v. */
  186. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator| (vMaskType v) const noexcept { return { NativeOps::bit_or (value, toVecType (v.value)) }; }
  187. /** Returns the bit-xor of the receiver and v. */
  188. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (vMaskType v) const noexcept { return { NativeOps::bit_xor (value, toVecType (v.value)) }; }
  189. /** Returns a vector where each element is the bit-inverted value of the corresponding element in the receiver.*/
  190. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator~() const noexcept { return { NativeOps::bit_not (value) }; }
  191. //==============================================================================
  192. /** Returns a vector where each element is the bit-and'd value of the corresponding element in the receiver and the scalar s.*/
  193. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator& (MaskType s) const noexcept { return { NativeOps::bit_and (value, toVecType (s)) }; }
  194. /** Returns a vector where each element is the bit-or'd value of the corresponding element in the receiver and the scalar s.*/
  195. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator| (MaskType s) const noexcept { return { NativeOps::bit_or (value, toVecType (s)) }; }
  196. /** Returns a vector where each element is the bit-xor'd value of the corresponding element in the receiver and the scalar s.*/
  197. inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (MaskType s) const noexcept { return { NativeOps::bit_xor (value, toVecType (s)) }; }
  198. //==============================================================================
  199. /** Returns true if all element-wise comparisons return true. */
  200. inline bool JUCE_VECTOR_CALLTYPE operator== (SIMDRegister other) const noexcept { return NativeOps::allEqual (value, other.value); }
  201. /** Returns true if any element-wise comparisons return false. */
  202. inline bool JUCE_VECTOR_CALLTYPE operator!= (SIMDRegister other) const noexcept { return ! (*this == other); }
  203. /** Returns true if all elements are equal to the scalar. */
  204. inline bool JUCE_VECTOR_CALLTYPE operator== (Type s) const noexcept { return *this == SIMDRegister::expand (s); }
  205. /** Returns true if any elements are not equal to the scalar. */
  206. inline bool JUCE_VECTOR_CALLTYPE operator!= (Type s) const noexcept { return ! (*this == s); }
  207. //==============================================================================
  208. /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
  209. if the corresponding element of a is equal to the corresponding element of b, or zero otherwise.
  210. The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
  211. static vMaskType JUCE_VECTOR_CALLTYPE equal (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::equal (a.value, b.value)); }
  212. /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
  213. if the corresponding element of a is not equal to the corresponding element of b, or zero otherwise.
  214. The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
  215. static vMaskType JUCE_VECTOR_CALLTYPE notEqual (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::notEqual (a.value, b.value)); }
  216. /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
  217. if the corresponding element of a is less than to the corresponding element of b, or zero otherwise.
  218. The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
  219. static vMaskType JUCE_VECTOR_CALLTYPE lessThan (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThan (b.value, a.value)); }
  220. /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
  221. if the corresponding element of a is than or equal to the corresponding element of b, or zero otherwise.
  222. The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
  223. static vMaskType JUCE_VECTOR_CALLTYPE lessThanOrEqual (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThanOrEqual (b.value, a.value)); }
  224. /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
  225. if the corresponding element of a is greater than to the corresponding element of b, or zero otherwise.
  226. The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
  227. static vMaskType JUCE_VECTOR_CALLTYPE greaterThan (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThan (a.value, b.value)); }
  228. /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
  229. if the corresponding element of a is greater than or equal to the corresponding element of b, or zero otherwise.
  230. The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
  231. static vMaskType JUCE_VECTOR_CALLTYPE greaterThanOrEqual (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThanOrEqual (a.value, b.value)); }
  232. //==============================================================================
  233. /** Returns a new vector where each element is the minimum of the corresponding element of a and b. */
  234. static SIMDRegister JUCE_VECTOR_CALLTYPE min (SIMDRegister a, SIMDRegister b) noexcept { return { NativeOps::min (a.value, b.value) }; }
  235. /** Returns a new vector where each element is the maximum of the corresponding element of a and b. */
  236. static SIMDRegister JUCE_VECTOR_CALLTYPE max (SIMDRegister a, SIMDRegister b) noexcept { return { NativeOps::max (a.value, b.value) }; }
  237. //==============================================================================
  238. /** Multiplies b and c and adds the result to a. */
  239. static SIMDRegister JUCE_VECTOR_CALLTYPE multiplyAdd (SIMDRegister a, const SIMDRegister b, SIMDRegister c) noexcept
  240. {
  241. return { CmplxOps::muladd (a.value, b.value, c.value) };
  242. }
  243. //==============================================================================
  244. /** Returns a scalar which is the sum of all elements of the receiver. */
  245. inline ElementType sum() const noexcept { return CmplxOps::sum (value); }
  246. //==============================================================================
  247. /** Truncates each element to its integer part.
  248. Effectively discards the fractional part of each element. A.k.a. round to zero. */
  249. static SIMDRegister JUCE_VECTOR_CALLTYPE truncate (SIMDRegister a) noexcept { return { NativeOps::truncate (a.value) }; }
  250. //==============================================================================
  251. /** Returns the absolute value of each element. */
  252. static SIMDRegister JUCE_VECTOR_CALLTYPE abs (SIMDRegister a) noexcept
  253. {
  254. return a - (a * (expand (ElementType (2)) & lessThan (a, expand (ElementType (0)))));
  255. }
  256. //==============================================================================
  257. /** Checks if the given pointer is sufficiently aligned for using SIMD operations. */
  258. static bool isSIMDAligned (const ElementType* ptr) noexcept
  259. {
  260. uintptr_t bitmask = SIMDRegisterSize - 1;
  261. return (reinterpret_cast<uintptr_t> (ptr) & bitmask) == 0;
  262. }
  263. /** Returns the next position in memory where isSIMDAligned returns true.
  264. If the current position in memory is already aligned then this method
  265. will simply return the pointer.
  266. */
  267. static ElementType* getNextSIMDAlignedPtr (ElementType* ptr) noexcept
  268. {
  269. return snapPointerToAlignment (ptr, SIMDRegisterSize);
  270. }
  271. private:
  272. static vMaskType JUCE_VECTOR_CALLTYPE toMaskType (vSIMDType a) noexcept
  273. {
  274. union
  275. {
  276. vSIMDType in;
  277. vMaskSIMDType out;
  278. } u;
  279. u.in = a;
  280. return vMaskType::fromNative (u.out);
  281. }
  282. static vSIMDType JUCE_VECTOR_CALLTYPE toVecType (vMaskSIMDType a) noexcept
  283. {
  284. union
  285. {
  286. vMaskSIMDType in;
  287. vSIMDType out;
  288. } u;
  289. u.in = a;
  290. return u.out;
  291. }
  292. static vSIMDType JUCE_VECTOR_CALLTYPE toVecType (MaskType a) noexcept
  293. {
  294. union
  295. {
  296. vMaskSIMDType in;
  297. vSIMDType out;
  298. } u;
  299. u.in = CmplxSIMDOps<MaskType>::expand (a);
  300. return u.out;
  301. }
  302. };
  303. } // namespace dsp
  304. } // namespace juce
  305. #ifndef DOXYGEN
  306. #include "juce_SIMDRegister_Impl.h"
  307. #endif