The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

540 lines
42KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2022 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 7 End-User License
  8. Agreement and JUCE Privacy Policy.
  9. End User License Agreement: www.juce.com/juce-7-licence
  10. Privacy Policy: www.juce.com/juce-privacy-policy
  11. Or: You may also use this code under the terms of the GPL v3 (see
  12. www.gnu.org/licenses).
  13. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  14. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  15. DISCLAIMED.
  16. ==============================================================================
  17. */
  18. namespace juce::dsp
  19. {
  20. #ifndef DOXYGEN
  21. JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wignored-attributes")
  22. #ifdef _MSC_VER
  23. #define DECLARE_NEON_SIMD_CONST(type, name) \
  24. static __declspec (align (16)) const type name [16 / sizeof (type)]
  25. #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
  26. __declspec (align (16)) const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)]
  27. #else
  28. #define DECLARE_NEON_SIMD_CONST(type, name) \
  29. static const type name [16 / sizeof (type)] __attribute__ ((aligned (16)))
  30. #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
  31. const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)] __attribute__ ((aligned (16)))
  32. #endif
  33. template <typename type>
  34. struct SIMDNativeOps;
  35. //==============================================================================
  36. /** Unsigned 32-bit integer NEON intrinsics.
  37. @tags{DSP}
  38. */
  39. template <>
  40. struct SIMDNativeOps<uint32_t>
  41. {
  42. //==============================================================================
  43. using vSIMDType = uint32x4_t;
  44. using fb = SIMDFallbackOps<uint32_t, vSIMDType>;
  45. //==============================================================================
  46. DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
  47. //==============================================================================
  48. static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
  49. static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
  50. static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
  51. static forcedinline uint32_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  52. static forcedinline vSIMDType set (vSIMDType v, size_t i, uint32_t s) noexcept { return fb::set (v, i, s); }
  53. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
  54. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
  55. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
  56. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
  57. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
  58. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
  59. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
  60. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
  61. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
  62. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
  63. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
  64. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
  65. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  66. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
  67. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
  68. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
  69. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  70. static forcedinline uint32_t sum (vSIMDType a) noexcept
  71. {
  72. auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a));
  73. return vget_lane_u32 (vpadd_u32 (rr, rr), 0);
  74. }
  75. };
  76. //==============================================================================
  77. /** Signed 32-bit integer NEON intrinsics.
  78. @tags{DSP}
  79. */
  80. template <>
  81. struct SIMDNativeOps<int32_t>
  82. {
  83. //==============================================================================
  84. using vSIMDType = int32x4_t;
  85. using fb = SIMDFallbackOps<int32_t, vSIMDType>;
  86. //==============================================================================
  87. DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
  88. //==============================================================================
  89. static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
  90. static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
  91. static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
  92. static forcedinline int32_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  93. static forcedinline vSIMDType set (vSIMDType v, size_t i, int32_t s) noexcept { return fb::set (v, i, s); }
  94. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
  95. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
  96. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
  97. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
  98. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
  99. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
  100. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
  101. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
  102. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
  103. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
  104. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
  105. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
  106. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  107. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
  108. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
  109. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
  110. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  111. static forcedinline int32_t sum (vSIMDType a) noexcept
  112. {
  113. auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a));
  114. rr = vpadd_s32 (rr, rr);
  115. return vget_lane_s32 (rr, 0);
  116. }
  117. };
  118. //==============================================================================
  119. /** Signed 8-bit integer NEON intrinsics.
  120. @tags{DSP}
  121. */
  122. template <>
  123. struct SIMDNativeOps<int8_t>
  124. {
  125. //==============================================================================
  126. using vSIMDType = int8x16_t;
  127. using fb = SIMDFallbackOps<int8_t, vSIMDType>;
  128. //==============================================================================
  129. DECLARE_NEON_SIMD_CONST (int8_t, kAllBitsSet);
  130. //==============================================================================
  131. static forcedinline vSIMDType expand (int8_t s) noexcept { return vdupq_n_s8 (s); }
  132. static forcedinline vSIMDType load (const int8_t* a) noexcept { return vld1q_s8 (a); }
  133. static forcedinline void store (vSIMDType value, int8_t* a) noexcept { vst1q_s8 (a, value); }
  134. static forcedinline int8_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  135. static forcedinline vSIMDType set (vSIMDType v, size_t i, int8_t s) noexcept { return fb::set (v, i, s); }
  136. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s8 (a, b); }
  137. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s8 (a, b); }
  138. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s8 (a, b); }
  139. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s8 (a, b); }
  140. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s8 (a, b); }
  141. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s8 (a, b); }
  142. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s8 (b, a); }
  143. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s8 ((int8_t*) kAllBitsSet)); }
  144. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s8 (a, b); }
  145. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s8 (a, b); }
  146. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s8 (a, b); }
  147. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  148. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s8 (a, b); }
  149. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s8 (a, b); }
  150. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
  151. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); }
  152. static forcedinline int8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  153. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  154. };
  155. //==============================================================================
  156. /** Unsigned 8-bit integer NEON intrinsics.
  157. @tags{DSP}
  158. */
  159. template <>
  160. struct SIMDNativeOps<uint8_t>
  161. {
  162. //==============================================================================
  163. using vSIMDType = uint8x16_t;
  164. using fb = SIMDFallbackOps<uint8_t, vSIMDType>;
  165. //==============================================================================
  166. DECLARE_NEON_SIMD_CONST (uint8_t, kAllBitsSet);
  167. //==============================================================================
  168. static forcedinline vSIMDType expand (uint8_t s) noexcept { return vdupq_n_u8 (s); }
  169. static forcedinline vSIMDType load (const uint8_t* a) noexcept { return vld1q_u8 (a); }
  170. static forcedinline void store (vSIMDType value, uint8_t* a) noexcept { vst1q_u8 (a, value); }
  171. static forcedinline uint8_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  172. static forcedinline vSIMDType set (vSIMDType v, size_t i, uint8_t s) noexcept { return fb::set (v, i, s); }
  173. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u8 (a, b); }
  174. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u8 (a, b); }
  175. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u8 (a, b); }
  176. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u8 (a, b); }
  177. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u8 (a, b); }
  178. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u8 (a, b); }
  179. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u8 (b, a); }
  180. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u8 ((uint8_t*) kAllBitsSet)); }
  181. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u8 (a, b); }
  182. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u8 (a, b); }
  183. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u8 (a, b); }
  184. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  185. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u8 (a, b); }
  186. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u8 (a, b); }
  187. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
  188. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); }
  189. static forcedinline uint8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  190. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  191. };
  192. //==============================================================================
  193. /** Signed 16-bit integer NEON intrinsics.
  194. @tags{DSP}
  195. */
  196. template <>
  197. struct SIMDNativeOps<int16_t>
  198. {
  199. //==============================================================================
  200. using vSIMDType = int16x8_t;
  201. using fb = SIMDFallbackOps<int16_t, vSIMDType>;
  202. //==============================================================================
  203. DECLARE_NEON_SIMD_CONST (int16_t, kAllBitsSet);
  204. //==============================================================================
  205. static forcedinline vSIMDType expand (int16_t s) noexcept { return vdupq_n_s16 (s); }
  206. static forcedinline vSIMDType load (const int16_t* a) noexcept { return vld1q_s16 (a); }
  207. static forcedinline void store (vSIMDType value, int16_t* a) noexcept { vst1q_s16 (a, value); }
  208. static forcedinline int16_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  209. static forcedinline vSIMDType set (vSIMDType v, size_t i, int16_t s) noexcept { return fb::set (v, i, s); }
  210. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s16 (a, b); }
  211. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s16 (a, b); }
  212. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s16 (a, b); }
  213. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s16 (a, b); }
  214. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s16 (a, b); }
  215. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s16 (a, b); }
  216. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s16 (b, a); }
  217. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s16 ((int16_t*) kAllBitsSet)); }
  218. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s16 (a, b); }
  219. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s16 (a, b); }
  220. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s16 (a, b); }
  221. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  222. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s16 (a, b); }
  223. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s16 (a, b); }
  224. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
  225. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); }
  226. static forcedinline int16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  227. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  228. };
  229. //==============================================================================
  230. /** Unsigned 16-bit integer NEON intrinsics.
  231. @tags{DSP}
  232. */
  233. template <>
  234. struct SIMDNativeOps<uint16_t>
  235. {
  236. //==============================================================================
  237. using vSIMDType = uint16x8_t;
  238. using fb = SIMDFallbackOps<uint16_t, vSIMDType>;
  239. //==============================================================================
  240. DECLARE_NEON_SIMD_CONST (uint16_t, kAllBitsSet);
  241. //==============================================================================
  242. static forcedinline vSIMDType expand (uint16_t s) noexcept { return vdupq_n_u16 (s); }
  243. static forcedinline vSIMDType load (const uint16_t* a) noexcept { return vld1q_u16 (a); }
  244. static forcedinline void store (vSIMDType value, uint16_t* a) noexcept { vst1q_u16 (a, value); }
  245. static forcedinline uint16_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  246. static forcedinline vSIMDType set (vSIMDType v, size_t i, uint16_t s) noexcept { return fb::set (v, i, s); }
  247. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u16 (a, b); }
  248. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u16 (a, b); }
  249. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u16 (a, b); }
  250. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u16 (a, b); }
  251. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u16 (a, b); }
  252. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u16 (a, b); }
  253. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u16 (b, a); }
  254. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u16 ((uint16_t*) kAllBitsSet)); }
  255. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u16 (a, b); }
  256. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u16 (a, b); }
  257. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u16 (a, b); }
  258. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  259. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u16 (a, b); }
  260. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u16 (a, b); }
  261. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
  262. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); }
  263. static forcedinline uint16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  264. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  265. };
  266. //==============================================================================
  267. /** Signed 64-bit integer NEON intrinsics.
  268. @tags{DSP}
  269. */
  270. template <>
  271. struct SIMDNativeOps<int64_t>
  272. {
  273. //==============================================================================
  274. using vSIMDType = int64x2_t;
  275. using fb = SIMDFallbackOps<int64_t, vSIMDType>;
  276. //==============================================================================
  277. DECLARE_NEON_SIMD_CONST (int64_t, kAllBitsSet);
  278. //==============================================================================
  279. static forcedinline vSIMDType expand (int64_t s) noexcept { return vdupq_n_s64 (s); }
  280. static forcedinline vSIMDType load (const int64_t* a) noexcept { return vld1q_s64 (a); }
  281. static forcedinline void store (vSIMDType value, int64_t* a) noexcept { vst1q_s64 (a, value); }
  282. static forcedinline int64_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  283. static forcedinline vSIMDType set (vSIMDType v, size_t i, int64_t s) noexcept { return fb::set (v, i, s); }
  284. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s64 (a, b); }
  285. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s64 (a, b); }
  286. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
  287. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s64 (a, b); }
  288. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s64 (a, b); }
  289. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s64 (a, b); }
  290. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s64 (b, a); }
  291. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s64 ((int64_t*) kAllBitsSet)); }
  292. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
  293. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
  294. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
  295. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
  296. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
  297. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
  298. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
  299. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
  300. static forcedinline int64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  301. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  302. };
  303. //==============================================================================
  304. /** Unsigned 64-bit integer NEON intrinsics.
  305. @tags{DSP}
  306. */
  307. template <>
  308. struct SIMDNativeOps<uint64_t>
  309. {
  310. //==============================================================================
  311. using vSIMDType = uint64x2_t;
  312. using fb = SIMDFallbackOps<uint64_t, vSIMDType>;
  313. //==============================================================================
  314. DECLARE_NEON_SIMD_CONST (uint64_t, kAllBitsSet);
  315. //==============================================================================
  316. static forcedinline vSIMDType expand (uint64_t s) noexcept { return vdupq_n_u64 (s); }
  317. static forcedinline vSIMDType load (const uint64_t* a) noexcept { return vld1q_u64 (a); }
  318. static forcedinline void store (vSIMDType value, uint64_t* a) noexcept { vst1q_u64 (a, value); }
  319. static forcedinline uint64_t get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  320. static forcedinline vSIMDType set (vSIMDType v, size_t i, uint64_t s) noexcept { return fb::set (v, i, s); }
  321. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u64 (a, b); }
  322. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u64 (a, b); }
  323. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
  324. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u64 (a, b); }
  325. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u64 (a, b); }
  326. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u64 (a, b); }
  327. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u64 (b, a); }
  328. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u64 ((uint64_t*) kAllBitsSet)); }
  329. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
  330. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
  331. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
  332. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
  333. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
  334. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
  335. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
  336. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
  337. static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  338. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
  339. };
  340. //==============================================================================
  341. /** Single-precision floating point NEON intrinsics.
  342. @tags{DSP}
  343. */
  344. template <>
  345. struct SIMDNativeOps<float>
  346. {
  347. //==============================================================================
  348. using vSIMDType = float32x4_t;
  349. using vMaskType = uint32x4_t;
  350. using fb = SIMDFallbackOps<float, vSIMDType>;
  351. //==============================================================================
  352. DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
  353. DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
  354. DECLARE_NEON_SIMD_CONST (float, kOne);
  355. //==============================================================================
  356. static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
  357. static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
  358. static forcedinline float get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  359. static forcedinline vSIMDType set (vSIMDType v, size_t i, float s) noexcept { return fb::set (v, i, s); }
  360. static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
  361. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
  362. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
  363. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
  364. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
  365. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
  366. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
  367. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
  368. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
  369. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
  370. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
  371. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
  372. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  373. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
  374. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
  375. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
  376. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
  377. static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
  378. static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
  379. static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
  380. static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
  381. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return vcvtq_f32_s32 (vcvtq_s32_f32 (a)); }
  382. //==============================================================================
  383. static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
  384. {
  385. vSIMDType rr_ir = mul (a, dupeven (b));
  386. vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
  387. return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
  388. }
  389. static forcedinline float sum (vSIMDType a) noexcept
  390. {
  391. auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a));
  392. return vget_lane_f32 (vpadd_f32 (rr, rr), 0);
  393. }
  394. };
  395. //==============================================================================
  396. /** Double-precision floating point NEON intrinsics does not exist in NEON
  397. so we need to emulate this.
  398. @tags{DSP}
  399. */
  400. #if JUCE_64BIT
  401. template <>
  402. struct SIMDNativeOps<double>
  403. {
  404. //==============================================================================
  405. using vSIMDType = float64x2_t;
  406. using vMaskType = uint64x2_t;
  407. using fb = SIMDFallbackOps<double, vSIMDType>;
  408. //==============================================================================
  409. DECLARE_NEON_SIMD_CONST (int64_t, kAllBitsSet);
  410. DECLARE_NEON_SIMD_CONST (double, kOne);
  411. //==============================================================================
  412. static forcedinline vSIMDType expand (double s) noexcept { return vdupq_n_f64 (s); }
  413. static forcedinline vSIMDType load (const double* a) noexcept { return vld1q_f64 (a); }
  414. static forcedinline double get (vSIMDType v, size_t i) noexcept { return fb::get (v, i); }
  415. static forcedinline vSIMDType set (vSIMDType v, size_t i, double s) noexcept { return fb::set (v, i, s); }
  416. static forcedinline void store (vSIMDType value, double* a) noexcept { vst1q_f64 (a, value); }
  417. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f64 (a, b); }
  418. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f64 (a, b); }
  419. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f64 (a, b); }
  420. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u64 ((vMaskType) a, (vMaskType) b); }
  421. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u64 ((vMaskType) a, (vMaskType) b); }
  422. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u64 ((vMaskType) a, (vMaskType) b); }
  423. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u64 ((vMaskType) b, (vMaskType) a); }
  424. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f64 ((double*) kAllBitsSet)); }
  425. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f64 (a, b); }
  426. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f64 (a, b); }
  427. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f64 (a, b); }
  428. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  429. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f64 (a, b); }
  430. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f64 (a, b); }
  431. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
  432. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f64 (a, b, c); }
  433. static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
  434. static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
  435. static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
  436. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return vcvtq_f64_s64 (vcvtq_s64_f64 (a)); }
  437. };
  438. #else
  439. template <>
  440. struct SIMDNativeOps<double>
  441. {
  442. //==============================================================================
  443. using vSIMDType = struct { double v[2]; };
  444. using fb = SIMDFallbackOps<double, vSIMDType>;
  445. static forcedinline vSIMDType expand (double s) noexcept { return {{s, s}}; }
  446. static forcedinline vSIMDType load (const double* a) noexcept { return {{a[0], a[1]}}; }
  447. static forcedinline void store (vSIMDType v, double* a) noexcept { a[0] = v.v[0]; a[1] = v.v[1]; }
  448. static forcedinline double get (vSIMDType v, size_t i) noexcept { return v.v[i]; }
  449. static forcedinline vSIMDType set (vSIMDType v, size_t i, double s) noexcept { v.v[i] = s; return v; }
  450. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return {{a.v[0] + b.v[0], a.v[1] + b.v[1]}}; }
  451. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return {{a.v[0] - b.v[0], a.v[1] - b.v[1]}}; }
  452. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return {{a.v[0] * b.v[0], a.v[1] * b.v[1]}}; }
  453. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
  454. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
  455. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
  456. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
  457. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
  458. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
  459. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
  460. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
  461. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
  462. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
  463. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
  464. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return fb::allEqual (a, b); }
  465. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
  466. static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
  467. static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
  468. static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
  469. static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return fb::truncate (a); }
  470. };
  471. #endif // JUCE_64BIT
  472. #endif // #ifndef DOXYGEN
  473. JUCE_END_IGNORE_WARNINGS_GCC_LIKE
  474. } // namespace juce::dsp