The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

467 lines
32KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 5 End-User License
  8. Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
  9. 27th April 2017).
  10. End User License Agreement: www.juce.com/juce-5-licence
  11. Privacy Policy: www.juce.com/juce-5-privacy-policy
  12. Or: You may also use this code under the terms of the GPL v3 (see
  13. www.gnu.org/licenses).
  14. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  15. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  16. DISCLAIMED.
  17. ==============================================================================
  18. */
  19. namespace juce
  20. {
  21. namespace dsp
  22. {
  23. #ifndef DOXYGEN
  24. #ifdef _MSC_VER
  25. #define DECLARE_NEON_SIMD_CONST(type, name) \
  26. static __declspec(align(16)) const type name [16 / sizeof (type)]
  27. #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
  28. __declspec(align(16)) const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)]
  29. #else
  30. #define DECLARE_NEON_SIMD_CONST(type, name) \
  31. static const type name [16 / sizeof (type)] __attribute__((aligned(16)))
  32. #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
  33. const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)] __attribute__((aligned(16)))
  34. #endif
  35. template <typename type>
  36. struct SIMDNativeOps;
  37. //==============================================================================
  38. /** Unsigned 32-bit integer NEON intrinsics.
  39. @tags{DSP}
  40. */
  41. template <>
  42. struct SIMDNativeOps<uint32_t>
  43. {
  44. //==============================================================================
  45. typedef uint32x4_t vSIMDType;
  46. typedef SIMDFallbackOps<uint32_t, vSIMDType> fb;
  47. //==============================================================================
  48. DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
  49. //==============================================================================
  50. static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
  51. static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
  52. static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
  53. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
  54. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
  55. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
  56. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
  57. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
  58. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
  59. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
  60. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
  61. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
  62. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
  63. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
  64. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
  65. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  66. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
  67. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
  68. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
  69. static forcedinline uint32_t sum (vSIMDType a) noexcept
  70. {
  71. auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a));
  72. return vget_lane_u32 (vpadd_u32 (rr, rr), 0);
  73. }
  74. };
  75. //==============================================================================
  76. /** Signed 32-bit integer NEON intrinsics.
  77. @tags{DSP}
  78. */
  79. template <>
  80. struct SIMDNativeOps<int32_t>
  81. {
  82. //==============================================================================
  83. typedef int32x4_t vSIMDType;
  84. typedef SIMDFallbackOps<int32_t, vSIMDType> fb;
  85. //==============================================================================
  86. DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
  87. //==============================================================================
  88. static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
  89. static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
  90. static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
  91. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
  92. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
  93. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
  94. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
  95. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
  96. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
  97. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
  98. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
  99. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
  100. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
  101. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
  102. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
  103. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  104. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
  105. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
  106. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
  107. static forcedinline int32_t sum (vSIMDType a) noexcept
  108. {
  109. auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a));
  110. rr = vpadd_s32 (rr, rr);
  111. return vget_lane_s32 (rr, 0);
  112. }
  113. };
  114. //==============================================================================
  115. /** Signed 8-bit integer NEON intrinsics.
  116. @tags{DSP}
  117. */
  118. template <>
  119. struct SIMDNativeOps<int8_t>
  120. {
  121. //==============================================================================
  122. typedef int8x16_t vSIMDType;
  123. typedef SIMDFallbackOps<int8_t, vSIMDType> fb;
  124. //==============================================================================
  125. DECLARE_NEON_SIMD_CONST (int8_t, kAllBitsSet);
  126. //==============================================================================
  127. static forcedinline vSIMDType expand (int8_t s) noexcept { return vdupq_n_s8 (s); }
  128. static forcedinline vSIMDType load (const int8_t* a) noexcept { return vld1q_s8 (a); }
  129. static forcedinline void store (vSIMDType value, int8_t* a) noexcept { vst1q_s8 (a, value); }
  130. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s8 (a, b); }
  131. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s8 (a, b); }
  132. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s8 (a, b); }
  133. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s8 (a, b); }
  134. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s8 (a, b); }
  135. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s8 (a, b); }
  136. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s8 (b, a); }
  137. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s8 ((int8_t*) kAllBitsSet)); }
  138. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s8 (a, b); }
  139. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s8 (a, b); }
  140. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s8 (a, b); }
  141. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  142. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s8 (a, b); }
  143. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s8 (a, b); }
  144. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
  145. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); }
  146. static forcedinline int8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  147. };
  148. //==============================================================================
  149. /** Unsigned 8-bit integer NEON intrinsics.
  150. @tags{DSP}
  151. */
  152. template <>
  153. struct SIMDNativeOps<uint8_t>
  154. {
  155. //==============================================================================
  156. typedef uint8x16_t vSIMDType;
  157. typedef SIMDFallbackOps<uint8_t, vSIMDType> fb;
  158. //==============================================================================
  159. DECLARE_NEON_SIMD_CONST (uint8_t, kAllBitsSet);
  160. //==============================================================================
  161. static forcedinline vSIMDType expand (uint8_t s) noexcept { return vdupq_n_u8 (s); }
  162. static forcedinline vSIMDType load (const uint8_t* a) noexcept { return vld1q_u8 (a); }
  163. static forcedinline void store (vSIMDType value, uint8_t* a) noexcept { vst1q_u8 (a, value); }
  164. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u8 (a, b); }
  165. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u8 (a, b); }
  166. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u8 (a, b); }
  167. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u8 (a, b); }
  168. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u8 (a, b); }
  169. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u8 (a, b); }
  170. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u8 (b, a); }
  171. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u8 ((uint8_t*) kAllBitsSet)); }
  172. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u8 (a, b); }
  173. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u8 (a, b); }
  174. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u8 (a, b); }
  175. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  176. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u8 (a, b); }
  177. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u8 (a, b); }
  178. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
  179. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); }
  180. static forcedinline uint8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  181. };
  182. //==============================================================================
  183. /** Signed 16-bit integer NEON intrinsics.
  184. @tags{DSP}
  185. */
  186. template <>
  187. struct SIMDNativeOps<int16_t>
  188. {
  189. //==============================================================================
  190. typedef int16x8_t vSIMDType;
  191. typedef SIMDFallbackOps<int16_t, vSIMDType> fb;
  192. //==============================================================================
  193. DECLARE_NEON_SIMD_CONST (int16_t, kAllBitsSet);
  194. //==============================================================================
  195. static forcedinline vSIMDType expand (int16_t s) noexcept { return vdupq_n_s16 (s); }
  196. static forcedinline vSIMDType load (const int16_t* a) noexcept { return vld1q_s16 (a); }
  197. static forcedinline void store (vSIMDType value, int16_t* a) noexcept { vst1q_s16 (a, value); }
  198. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s16 (a, b); }
  199. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s16 (a, b); }
  200. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s16 (a, b); }
  201. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s16 (a, b); }
  202. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s16 (a, b); }
  203. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s16 (a, b); }
  204. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s16 (b, a); }
  205. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s16 ((int16_t*) kAllBitsSet)); }
  206. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s16 (a, b); }
  207. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s16 (a, b); }
  208. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s16 (a, b); }
  209. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  210. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s16 (a, b); }
  211. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s16 (a, b); }
  212. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
  213. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); }
  214. static forcedinline int16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  215. };
  216. //==============================================================================
  217. /** Unsigned 16-bit integer NEON intrinsics.
  218. @tags{DSP}
  219. */
  220. template <>
  221. struct SIMDNativeOps<uint16_t>
  222. {
  223. //==============================================================================
  224. typedef uint16x8_t vSIMDType;
  225. typedef SIMDFallbackOps<uint16_t, vSIMDType> fb;
  226. //==============================================================================
  227. DECLARE_NEON_SIMD_CONST (uint16_t, kAllBitsSet);
  228. //==============================================================================
  229. static forcedinline vSIMDType expand (uint16_t s) noexcept { return vdupq_n_u16 (s); }
  230. static forcedinline vSIMDType load (const uint16_t* a) noexcept { return vld1q_u16 (a); }
  231. static forcedinline void store (vSIMDType value, uint16_t* a) noexcept { vst1q_u16 (a, value); }
  232. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u16 (a, b); }
  233. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u16 (a, b); }
  234. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u16 (a, b); }
  235. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u16 (a, b); }
  236. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u16 (a, b); }
  237. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u16 (a, b); }
  238. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u16 (b, a); }
  239. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u16 ((uint16_t*) kAllBitsSet)); }
  240. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u16 (a, b); }
  241. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u16 (a, b); }
  242. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u16 (a, b); }
  243. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  244. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u16 (a, b); }
  245. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u16 (a, b); }
  246. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
  247. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); }
  248. static forcedinline uint16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  249. };
  250. //==============================================================================
  251. /** Signed 64-bit integer NEON intrinsics.
  252. @tags{DSP}
  253. */
  254. template <>
  255. struct SIMDNativeOps<int64_t>
  256. {
  257. //==============================================================================
  258. typedef int64x2_t vSIMDType;
  259. typedef SIMDFallbackOps<int64_t, vSIMDType> fb;
  260. //==============================================================================
  261. DECLARE_NEON_SIMD_CONST (int64_t, kAllBitsSet);
  262. //==============================================================================
  263. static forcedinline vSIMDType expand (int64_t s) noexcept { return vdupq_n_s64 (s); }
  264. static forcedinline vSIMDType load (const int64_t* a) noexcept { return vld1q_s64 (a); }
  265. static forcedinline void store (vSIMDType value, int64_t* a) noexcept { vst1q_s64 (a, value); }
  266. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s64 (a, b); }
  267. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s64 (a, b); }
  268. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
  269. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s64 (a, b); }
  270. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s64 (a, b); }
  271. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s64 (a, b); }
  272. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s64 (b, a); }
  273. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s64 ((int64_t*) kAllBitsSet)); }
  274. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
  275. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
  276. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
  277. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
  278. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
  279. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
  280. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
  281. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
  282. static forcedinline int64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  283. };
  284. //==============================================================================
  285. /** Unsigned 64-bit integer NEON intrinsics.
  286. @tags{DSP}
  287. */
  288. template <>
  289. struct SIMDNativeOps<uint64_t>
  290. {
  291. //==============================================================================
  292. typedef uint64x2_t vSIMDType;
  293. typedef SIMDFallbackOps<uint64_t, vSIMDType> fb;
  294. //==============================================================================
  295. DECLARE_NEON_SIMD_CONST (uint64_t, kAllBitsSet);
  296. //==============================================================================
  297. static forcedinline vSIMDType expand (uint64_t s) noexcept { return vdupq_n_u64 (s); }
  298. static forcedinline vSIMDType load (const uint64_t* a) noexcept { return vld1q_u64 (a); }
  299. static forcedinline void store (vSIMDType value, uint64_t* a) noexcept { vst1q_u64 (a, value); }
  300. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u64 (a, b); }
  301. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u64 (a, b); }
  302. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
  303. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u64 (a, b); }
  304. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u64 (a, b); }
  305. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u64 (a, b); }
  306. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u64 (b, a); }
  307. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u64 ((uint64_t*) kAllBitsSet)); }
  308. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
  309. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
  310. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
  311. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
  312. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
  313. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
  314. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
  315. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
  316. static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
  317. };
  318. //==============================================================================
  319. /** Single-precision floating point NEON intrinsics.
  320. @tags{DSP}
  321. */
  322. template <>
  323. struct SIMDNativeOps<float>
  324. {
  325. //==============================================================================
  326. typedef float32x4_t vSIMDType;
  327. typedef uint32x4_t vMaskType;
  328. typedef SIMDFallbackOps<float, vSIMDType> fb;
  329. //==============================================================================
  330. DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
  331. DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
  332. DECLARE_NEON_SIMD_CONST (float, kOne);
  333. //==============================================================================
  334. static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
  335. static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
  336. static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
  337. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
  338. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
  339. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
  340. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
  341. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
  342. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
  343. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
  344. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
  345. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
  346. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
  347. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
  348. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
  349. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
  350. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
  351. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
  352. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
  353. static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
  354. static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
  355. static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
  356. static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
  357. //==============================================================================
  358. static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
  359. {
  360. vSIMDType rr_ir = mul (a, dupeven (b));
  361. vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
  362. return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
  363. }
  364. static forcedinline float sum (vSIMDType a) noexcept
  365. {
  366. auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a));
  367. return vget_lane_f32 (vpadd_f32 (rr, rr), 0);
  368. }
  369. };
  370. //==============================================================================
  371. /** Double-precision floating point NEON intrinsics does not exist in NEON
  372. so we need to emulate this.
  373. @tags{DSP}
  374. */
  375. template <>
  376. struct SIMDNativeOps<double>
  377. {
  378. //==============================================================================
  379. typedef struct { double values [2]; } vSIMDType;
  380. typedef SIMDFallbackOps<double, vSIMDType> fb;
  381. static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); }
  382. static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); }
  383. static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); }
  384. static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); }
  385. static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); }
  386. static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
  387. static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
  388. static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
  389. static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
  390. static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
  391. static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
  392. static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
  393. static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
  394. static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
  395. static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
  396. static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
  397. static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
  398. static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return fb::allEqual (a, b); }
  399. static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
  400. static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
  401. static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
  402. static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
  403. };
  404. #endif
  405. } // namespace dsp
  406. } // namespace juce