Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1306 lines
57KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2020 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. The code included in this file is provided under the terms of the ISC license
  8. http://www.isc.org/downloads/software-support-policy/isc-license. Permission
  9. To use, copy, modify, and/or distribute this software for any purpose with or
  10. without fee is hereby granted provided that the above copyright notice and
  11. this permission notice appear in all copies.
  12. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  13. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  14. DISCLAIMED.
  15. ==============================================================================
  16. */
  17. namespace juce
  18. {
  19. namespace FloatVectorHelpers
  20. {
  21. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  22. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  23. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  24. #if JUCE_USE_SSE_INTRINSICS
  25. static bool isAligned (const void* p) noexcept
  26. {
  27. return (((pointer_sized_int) p) & 15) == 0;
  28. }
  29. struct BasicOps32
  30. {
  31. using Type = float;
  32. using ParallelType = __m128;
  33. using IntegerType = __m128;
  34. enum { numParallel = 4 };
  35. // Integer and parallel types are the same for SSE. On neon they have different types
  36. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  37. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  38. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  39. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  40. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  41. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  42. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  43. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  44. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  45. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  46. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  47. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  48. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  49. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  50. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  51. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  52. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  53. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  54. };
  55. struct BasicOps64
  56. {
  57. using Type = double;
  58. using ParallelType = __m128d;
  59. using IntegerType = __m128d;
  60. enum { numParallel = 2 };
  61. // Integer and parallel types are the same for SSE. On neon they have different types
  62. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  63. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  64. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  65. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  66. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  67. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  68. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  69. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  70. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  71. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  72. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  73. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  74. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  75. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  76. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  77. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  78. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  79. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  80. };
  81. #define JUCE_BEGIN_VEC_OP \
  82. using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
  83. { \
  84. const int numLongOps = num / Mode::numParallel;
  85. #define JUCE_FINISH_VEC_OP(normalOp) \
  86. num &= (Mode::numParallel - 1); \
  87. if (num == 0) return; \
  88. } \
  89. for (int i = 0; i < num; ++i) normalOp;
  90. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  91. JUCE_BEGIN_VEC_OP \
  92. setupOp \
  93. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  94. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  95. JUCE_FINISH_VEC_OP (normalOp)
  96. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  97. JUCE_BEGIN_VEC_OP \
  98. setupOp \
  99. if (FloatVectorHelpers::isAligned (dest)) \
  100. { \
  101. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  102. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  103. }\
  104. else \
  105. { \
  106. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  107. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  108. } \
  109. JUCE_FINISH_VEC_OP (normalOp)
  110. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  111. JUCE_BEGIN_VEC_OP \
  112. setupOp \
  113. if (FloatVectorHelpers::isAligned (dest)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src1)) \
  116. { \
  117. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  118. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  119. } \
  120. else \
  121. { \
  122. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  123. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  124. } \
  125. } \
  126. else \
  127. { \
  128. if (FloatVectorHelpers::isAligned (src1)) \
  129. { \
  130. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  131. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  132. } \
  133. else \
  134. { \
  135. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  136. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  137. } \
  138. } \
  139. JUCE_FINISH_VEC_OP (normalOp)
  140. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  141. JUCE_BEGIN_VEC_OP \
  142. setupOp \
  143. if (FloatVectorHelpers::isAligned (dest)) \
  144. { \
  145. if (FloatVectorHelpers::isAligned (src1)) \
  146. { \
  147. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  148. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  149. } \
  150. else \
  151. { \
  152. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  153. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  154. } \
  155. } \
  156. else \
  157. { \
  158. if (FloatVectorHelpers::isAligned (src1)) \
  159. { \
  160. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  161. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  162. } \
  163. else \
  164. { \
  165. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  166. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  167. } \
  168. } \
  169. JUCE_FINISH_VEC_OP (normalOp)
  170. //==============================================================================
  171. #elif JUCE_USE_ARM_NEON
  172. struct BasicOps32
  173. {
  174. using Type = float;
  175. using ParallelType = float32x4_t;
  176. using IntegerType = uint32x4_t;
  177. union signMaskUnion { ParallelType f; IntegerType i; };
  178. enum { numParallel = 4 };
  179. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  180. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  181. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  182. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  183. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  184. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  185. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  186. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  187. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  188. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  189. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  190. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  191. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  192. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  193. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  194. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  195. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  196. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  197. };
  198. struct BasicOps64
  199. {
  200. using Type = double;
  201. using ParallelType = double;
  202. using IntegerType = uint64;
  203. union signMaskUnion { ParallelType f; IntegerType i; };
  204. enum { numParallel = 1 };
  205. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  206. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  207. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  208. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  209. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  210. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  211. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  212. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  213. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  214. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  215. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  216. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  217. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  218. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  219. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  220. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  221. static forcedinline Type max (ParallelType a) noexcept { return a; }
  222. static forcedinline Type min (ParallelType a) noexcept { return a; }
  223. };
  224. #define JUCE_BEGIN_VEC_OP \
  225. using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
  226. if (Mode::numParallel > 1) \
  227. { \
  228. const int numLongOps = num / Mode::numParallel;
  229. #define JUCE_FINISH_VEC_OP(normalOp) \
  230. num &= (Mode::numParallel - 1); \
  231. if (num == 0) return; \
  232. } \
  233. for (int i = 0; i < num; ++i) normalOp;
  234. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  235. JUCE_BEGIN_VEC_OP \
  236. setupOp \
  237. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  238. JUCE_FINISH_VEC_OP (normalOp)
  239. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  240. JUCE_BEGIN_VEC_OP \
  241. setupOp \
  242. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  243. JUCE_FINISH_VEC_OP (normalOp)
  244. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  245. JUCE_BEGIN_VEC_OP \
  246. setupOp \
  247. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  248. JUCE_FINISH_VEC_OP (normalOp)
  249. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  250. JUCE_BEGIN_VEC_OP \
  251. setupOp \
  252. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  253. JUCE_FINISH_VEC_OP (normalOp)
  254. //==============================================================================
  255. #else
  256. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  257. for (int i = 0; i < num; ++i) normalOp;
  258. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  259. for (int i = 0; i < num; ++i) normalOp;
  260. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  261. for (int i = 0; i < num; ++i) normalOp;
  262. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  263. for (int i = 0; i < num; ++i) normalOp;
  264. #endif
  265. //==============================================================================
  266. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  267. for (int i = 0; i < numLongOps; ++i) \
  268. { \
  269. locals (srcLoad, dstLoad); \
  270. dstStore (dest, vecOp); \
  271. increment; \
  272. }
  273. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  274. for (int i = 0; i < numLongOps; ++i) \
  275. { \
  276. locals (src1Load, src2Load); \
  277. dstStore (dest, vecOp); \
  278. increment; \
  279. }
  280. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  281. for (int i = 0; i < numLongOps; ++i) \
  282. { \
  283. locals (src1Load, src2Load, dstLoad); \
  284. dstStore (dest, vecOp); \
  285. increment; \
  286. }
  287. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  288. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  289. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  290. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  291. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  292. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  293. union signMask32 { float f; uint32 i; };
  294. union signMask64 { double d; uint64 i; };
  295. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  296. template<int typeSize> struct ModeType { using Mode = BasicOps32; };
  297. template<> struct ModeType<8> { using Mode = BasicOps64; };
  298. template <typename Mode>
  299. struct MinMax
  300. {
  301. using Type = typename Mode::Type;
  302. using ParallelType = typename Mode::ParallelType;
  303. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  304. {
  305. int numLongOps = num / Mode::numParallel;
  306. if (numLongOps > 1)
  307. {
  308. ParallelType val;
  309. #if ! JUCE_USE_ARM_NEON
  310. if (isAligned (src))
  311. {
  312. val = Mode::loadA (src);
  313. if (isMinimum)
  314. {
  315. while (--numLongOps > 0)
  316. {
  317. src += Mode::numParallel;
  318. val = Mode::min (val, Mode::loadA (src));
  319. }
  320. }
  321. else
  322. {
  323. while (--numLongOps > 0)
  324. {
  325. src += Mode::numParallel;
  326. val = Mode::max (val, Mode::loadA (src));
  327. }
  328. }
  329. }
  330. else
  331. #endif
  332. {
  333. val = Mode::loadU (src);
  334. if (isMinimum)
  335. {
  336. while (--numLongOps > 0)
  337. {
  338. src += Mode::numParallel;
  339. val = Mode::min (val, Mode::loadU (src));
  340. }
  341. }
  342. else
  343. {
  344. while (--numLongOps > 0)
  345. {
  346. src += Mode::numParallel;
  347. val = Mode::max (val, Mode::loadU (src));
  348. }
  349. }
  350. }
  351. Type result = isMinimum ? Mode::min (val)
  352. : Mode::max (val);
  353. num &= (Mode::numParallel - 1);
  354. src += Mode::numParallel;
  355. for (int i = 0; i < num; ++i)
  356. result = isMinimum ? jmin (result, src[i])
  357. : jmax (result, src[i]);
  358. return result;
  359. }
  360. return isMinimum ? juce::findMinimum (src, num)
  361. : juce::findMaximum (src, num);
  362. }
  363. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  364. {
  365. int numLongOps = num / Mode::numParallel;
  366. if (numLongOps > 1)
  367. {
  368. ParallelType mn, mx;
  369. #if ! JUCE_USE_ARM_NEON
  370. if (isAligned (src))
  371. {
  372. mn = Mode::loadA (src);
  373. mx = mn;
  374. while (--numLongOps > 0)
  375. {
  376. src += Mode::numParallel;
  377. const ParallelType v = Mode::loadA (src);
  378. mn = Mode::min (mn, v);
  379. mx = Mode::max (mx, v);
  380. }
  381. }
  382. else
  383. #endif
  384. {
  385. mn = Mode::loadU (src);
  386. mx = mn;
  387. while (--numLongOps > 0)
  388. {
  389. src += Mode::numParallel;
  390. const ParallelType v = Mode::loadU (src);
  391. mn = Mode::min (mn, v);
  392. mx = Mode::max (mx, v);
  393. }
  394. }
  395. Range<Type> result (Mode::min (mn),
  396. Mode::max (mx));
  397. num &= (Mode::numParallel - 1);
  398. src += Mode::numParallel;
  399. for (int i = 0; i < num; ++i)
  400. result = result.getUnionWith (src[i]);
  401. return result;
  402. }
  403. return Range<Type>::findMinAndMax (src, num);
  404. }
  405. };
  406. #endif
  407. }
  408. //==============================================================================
  409. namespace
  410. {
  411. #if JUCE_USE_VDSP_FRAMEWORK
  412. // This casts away constness to account for slightly different vDSP function signatures
  413. // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
  414. template <typename ValueType>
  415. ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
  416. #endif
  417. }
  418. //==============================================================================
  419. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  420. {
  421. #if JUCE_USE_VDSP_FRAMEWORK
  422. vDSP_vclr (dest, 1, (size_t) num);
  423. #else
  424. zeromem (dest, (size_t) num * sizeof (float));
  425. #endif
  426. }
  427. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  428. {
  429. #if JUCE_USE_VDSP_FRAMEWORK
  430. vDSP_vclrD (dest, 1, (size_t) num);
  431. #else
  432. zeromem (dest, (size_t) num * sizeof (double));
  433. #endif
  434. }
  435. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  436. {
  437. #if JUCE_USE_VDSP_FRAMEWORK
  438. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  439. #else
  440. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  441. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  442. #endif
  443. }
  444. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  445. {
  446. #if JUCE_USE_VDSP_FRAMEWORK
  447. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  448. #else
  449. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  450. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  451. #endif
  452. }
  453. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  454. {
  455. memcpy (dest, src, (size_t) num * sizeof (float));
  456. }
  457. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  458. {
  459. memcpy (dest, src, (size_t) num * sizeof (double));
  460. }
  461. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  462. {
  463. #if JUCE_USE_VDSP_FRAMEWORK
  464. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  465. #else
  466. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  467. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  468. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  469. #endif
  470. }
  471. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  472. {
  473. #if JUCE_USE_VDSP_FRAMEWORK
  474. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  475. #else
  476. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  477. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  478. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  479. #endif
  480. }
  481. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  482. {
  483. #if JUCE_USE_VDSP_FRAMEWORK
  484. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  485. #else
  486. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  487. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  488. #endif
  489. }
  490. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  491. {
  492. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  493. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  494. }
  495. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
  496. {
  497. #if JUCE_USE_VDSP_FRAMEWORK
  498. vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  499. #else
  500. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  501. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  502. const Mode::ParallelType am = Mode::load1 (amount);)
  503. #endif
  504. }
  505. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
  506. {
  507. #if JUCE_USE_VDSP_FRAMEWORK
  508. vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  509. #else
  510. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  511. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  512. const Mode::ParallelType am = Mode::load1 (amount);)
  513. #endif
  514. }
  515. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  516. {
  517. #if JUCE_USE_VDSP_FRAMEWORK
  518. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  519. #else
  520. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  521. #endif
  522. }
  523. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  524. {
  525. #if JUCE_USE_VDSP_FRAMEWORK
  526. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  527. #else
  528. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  529. #endif
  530. }
  531. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  532. {
  533. #if JUCE_USE_VDSP_FRAMEWORK
  534. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  535. #else
  536. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  537. #endif
  538. }
  539. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  540. {
  541. #if JUCE_USE_VDSP_FRAMEWORK
  542. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  543. #else
  544. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  545. #endif
  546. }
  547. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  548. {
  549. #if JUCE_USE_VDSP_FRAMEWORK
  550. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  551. #else
  552. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  553. #endif
  554. }
  555. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  556. {
  557. #if JUCE_USE_VDSP_FRAMEWORK
  558. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  559. #else
  560. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  561. #endif
  562. }
  563. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  564. {
  565. #if JUCE_USE_VDSP_FRAMEWORK
  566. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  567. #else
  568. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  569. #endif
  570. }
  571. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  572. {
  573. #if JUCE_USE_VDSP_FRAMEWORK
  574. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  575. #else
  576. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  577. #endif
  578. }
  579. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  580. {
  581. #if JUCE_USE_VDSP_FRAMEWORK
  582. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  583. #else
  584. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  585. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  586. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  587. #endif
  588. }
  589. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  590. {
  591. #if JUCE_USE_VDSP_FRAMEWORK
  592. vDSP_vsmaD (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  593. #else
  594. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  595. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  596. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  597. #endif
  598. }
  599. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  600. {
  601. #if JUCE_USE_VDSP_FRAMEWORK
  602. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  603. #else
  604. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  605. JUCE_LOAD_SRC1_SRC2_DEST,
  606. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  607. #endif
  608. }
  609. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  610. {
  611. #if JUCE_USE_VDSP_FRAMEWORK
  612. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  613. #else
  614. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  615. JUCE_LOAD_SRC1_SRC2_DEST,
  616. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  617. #endif
  618. }
  619. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  620. {
  621. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
  622. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  623. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  624. }
  625. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  626. {
  627. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
  628. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  629. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  630. }
  631. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  632. {
  633. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
  634. JUCE_LOAD_SRC1_SRC2_DEST,
  635. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  636. }
  637. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  638. {
  639. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
  640. JUCE_LOAD_SRC1_SRC2_DEST,
  641. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  642. }
  643. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  644. {
  645. #if JUCE_USE_VDSP_FRAMEWORK
  646. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  647. #else
  648. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  649. #endif
  650. }
  651. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  652. {
  653. #if JUCE_USE_VDSP_FRAMEWORK
  654. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  655. #else
  656. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  657. #endif
  658. }
  659. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  660. {
  661. #if JUCE_USE_VDSP_FRAMEWORK
  662. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  663. #else
  664. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  665. #endif
  666. }
  667. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  668. {
  669. #if JUCE_USE_VDSP_FRAMEWORK
  670. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  671. #else
  672. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  673. #endif
  674. }
  675. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  676. {
  677. #if JUCE_USE_VDSP_FRAMEWORK
  678. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  679. #else
  680. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  681. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  682. #endif
  683. }
  684. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  685. {
  686. #if JUCE_USE_VDSP_FRAMEWORK
  687. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  688. #else
  689. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  690. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  691. #endif
  692. }
  693. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  694. {
  695. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  696. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  697. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  698. }
  699. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  700. {
  701. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  702. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  703. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  704. }
  705. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  706. {
  707. #if JUCE_USE_VDSP_FRAMEWORK
  708. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  709. #else
  710. copyWithMultiply (dest, src, -1.0f, num);
  711. #endif
  712. }
  713. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  714. {
  715. #if JUCE_USE_VDSP_FRAMEWORK
  716. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  717. #else
  718. copyWithMultiply (dest, src, -1.0f, num);
  719. #endif
  720. }
  721. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  722. {
  723. #if JUCE_USE_VDSP_FRAMEWORK
  724. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  725. #else
  726. FloatVectorHelpers::signMask32 signMask;
  727. signMask.i = 0x7fffffffUL;
  728. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]), Mode::bit_and (s, mask),
  729. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  730. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  731. ignoreUnused (signMask);
  732. #endif
  733. }
  734. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  735. {
  736. #if JUCE_USE_VDSP_FRAMEWORK
  737. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  738. #else
  739. FloatVectorHelpers::signMask64 signMask;
  740. signMask.i = 0x7fffffffffffffffULL;
  741. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]), Mode::bit_and (s, mask),
  742. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  743. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  744. ignoreUnused (signMask);
  745. #endif
  746. }
  747. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  748. {
  749. #if JUCE_USE_ARM_NEON
  750. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  751. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  752. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  753. #else
  754. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
  755. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast<const __m128i*> (src)))),
  756. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  757. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  758. #endif
  759. }
  760. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  761. {
  762. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  763. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  764. const Mode::ParallelType cmp = Mode::load1 (comp);)
  765. }
  766. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  767. {
  768. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  769. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  770. const Mode::ParallelType cmp = Mode::load1 (comp);)
  771. }
  772. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  773. {
  774. #if JUCE_USE_VDSP_FRAMEWORK
  775. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  776. #else
  777. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  778. #endif
  779. }
  780. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  781. {
  782. #if JUCE_USE_VDSP_FRAMEWORK
  783. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  784. #else
  785. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  786. #endif
  787. }
  788. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  789. {
  790. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  791. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  792. const Mode::ParallelType cmp = Mode::load1 (comp);)
  793. }
  794. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  795. {
  796. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  797. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  798. const Mode::ParallelType cmp = Mode::load1 (comp);)
  799. }
  800. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  801. {
  802. #if JUCE_USE_VDSP_FRAMEWORK
  803. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  804. #else
  805. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  806. #endif
  807. }
  808. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  809. {
  810. #if JUCE_USE_VDSP_FRAMEWORK
  811. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  812. #else
  813. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  814. #endif
  815. }
  816. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  817. {
  818. jassert(high >= low);
  819. #if JUCE_USE_VDSP_FRAMEWORK
  820. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  821. #else
  822. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  823. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  824. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  825. #endif
  826. }
  827. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  828. {
  829. jassert(high >= low);
  830. #if JUCE_USE_VDSP_FRAMEWORK
  831. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  832. #else
  833. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  834. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  835. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  836. #endif
  837. }
  838. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  839. {
  840. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  841. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  842. #else
  843. return Range<float>::findMinAndMax (src, num);
  844. #endif
  845. }
  846. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  847. {
  848. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  849. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  850. #else
  851. return Range<double>::findMinAndMax (src, num);
  852. #endif
  853. }
  854. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  855. {
  856. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  857. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  858. #else
  859. return juce::findMinimum (src, num);
  860. #endif
  861. }
  862. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  863. {
  864. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  865. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  866. #else
  867. return juce::findMinimum (src, num);
  868. #endif
  869. }
  870. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  871. {
  872. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  873. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  874. #else
  875. return juce::findMaximum (src, num);
  876. #endif
  877. }
  878. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  879. {
  880. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  881. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  882. #else
  883. return juce::findMaximum (src, num);
  884. #endif
  885. }
  886. intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
  887. {
  888. intptr_t fpsr = 0;
  889. #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
  890. fpsr = static_cast<intptr_t> (_mm_getcsr());
  891. #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
  892. #if defined (__arm64__) || defined (__aarch64__)
  893. asm volatile("mrs %0, fpcr" : "=r" (fpsr));
  894. #elif JUCE_USE_ARM_NEON
  895. asm volatile("vmrs %0, fpscr" : "=r" (fpsr));
  896. #endif
  897. #else
  898. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  899. jassertfalse; // No support for getting the floating point status register for your platform
  900. #endif
  901. #endif
  902. return fpsr;
  903. }
  904. void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister (intptr_t fpsr) noexcept
  905. {
  906. #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
  907. auto fpsr_w = static_cast<uint32_t> (fpsr);
  908. _mm_setcsr (fpsr_w);
  909. #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
  910. #if defined (__arm64__) || defined (__aarch64__)
  911. asm volatile("msr fpcr, %0" : : "ri" (fpsr));
  912. #elif JUCE_USE_ARM_NEON
  913. asm volatile("vmsr fpscr, %0" : : "ri" (fpsr));
  914. #endif
  915. #else
  916. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  917. jassertfalse; // No support for getting the floating point status register for your platform
  918. #endif
  919. ignoreUnused (fpsr);
  920. #endif
  921. }
  922. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  923. {
  924. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
  925. #if JUCE_USE_SSE_INTRINSICS
  926. intptr_t mask = _MM_FLUSH_ZERO_MASK;
  927. #else /*JUCE_USE_ARM_NEON*/
  928. intptr_t mask = (1 << 24 /* FZ */);
  929. #endif
  930. setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0));
  931. #else
  932. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  933. jassertfalse; // No support for flush to zero mode on your platform
  934. #endif
  935. ignoreUnused (shouldEnable);
  936. #endif
  937. }
  938. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport (bool shouldDisable) noexcept
  939. {
  940. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
  941. #if JUCE_USE_SSE_INTRINSICS
  942. intptr_t mask = 0x8040;
  943. #else /*JUCE_USE_ARM_NEON*/
  944. intptr_t mask = (1 << 24 /* FZ */);
  945. #endif
  946. setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldDisable ? mask : 0));
  947. #else
  948. ignoreUnused (shouldDisable);
  949. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  950. jassertfalse; // No support for disable denormals mode on your platform
  951. #endif
  952. #endif
  953. }
  954. bool JUCE_CALLTYPE FloatVectorOperations::areDenormalsDisabled() noexcept
  955. {
  956. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
  957. #if JUCE_USE_SSE_INTRINSICS
  958. intptr_t mask = 0x8040;
  959. #else /*JUCE_USE_ARM_NEON*/
  960. intptr_t mask = (1 << 24 /* FZ */);
  961. #endif
  962. return ((getFpStatusRegister() & mask) == mask);
  963. #else
  964. return false;
  965. #endif
  966. }
  967. ScopedNoDenormals::ScopedNoDenormals() noexcept
  968. {
  969. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
  970. #if JUCE_USE_SSE_INTRINSICS
  971. intptr_t mask = 0x8040;
  972. #else /*JUCE_USE_ARM_NEON*/
  973. intptr_t mask = (1 << 24 /* FZ */);
  974. #endif
  975. fpsr = FloatVectorOperations::getFpStatusRegister();
  976. FloatVectorOperations::setFpStatusRegister (fpsr | mask);
  977. #endif
  978. }
  979. ScopedNoDenormals::~ScopedNoDenormals() noexcept
  980. {
  981. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
  982. FloatVectorOperations::setFpStatusRegister (fpsr);
  983. #endif
  984. }
  985. //==============================================================================
  986. //==============================================================================
  987. #if JUCE_UNIT_TESTS
  988. class FloatVectorOperationsTests : public UnitTest
  989. {
  990. public:
  991. FloatVectorOperationsTests()
  992. : UnitTest ("FloatVectorOperations", UnitTestCategories::audio)
  993. {}
  994. template <typename ValueType>
  995. struct TestRunner
  996. {
  997. static void runTest (UnitTest& u, Random random)
  998. {
  999. const int range = random.nextBool() ? 500 : 10;
  1000. const int num = random.nextInt (range) + 1;
  1001. HeapBlock<ValueType> buffer1 (num + 16), buffer2 (num + 16);
  1002. HeapBlock<int> buffer3 (num + 16);
  1003. #if JUCE_ARM
  1004. ValueType* const data1 = buffer1;
  1005. ValueType* const data2 = buffer2;
  1006. int* const int1 = buffer3;
  1007. #else
  1008. // These tests deliberately operate on misaligned memory and will be flagged up by
  1009. // checks for undefined behavior!
  1010. ValueType* const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16));
  1011. ValueType* const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16));
  1012. int* const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16));
  1013. #endif
  1014. fillRandomly (random, data1, num);
  1015. fillRandomly (random, data2, num);
  1016. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  1017. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  1018. u.expect (minMax1 == minMax2);
  1019. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  1020. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  1021. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  1022. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  1023. FloatVectorOperations::clear (data1, num);
  1024. u.expect (areAllValuesEqual (data1, num, 0));
  1025. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  1026. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  1027. FloatVectorOperations::add (data1, (ValueType) 2, num);
  1028. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  1029. FloatVectorOperations::copy (data2, data1, num);
  1030. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  1031. FloatVectorOperations::add (data2, data1, num);
  1032. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  1033. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  1034. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  1035. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  1036. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  1037. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  1038. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  1039. FloatVectorOperations::multiply (data1, data2, num);
  1040. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  1041. FloatVectorOperations::negate (data2, data1, num);
  1042. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  1043. FloatVectorOperations::subtract (data1, data2, num);
  1044. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  1045. FloatVectorOperations::abs (data1, data2, num);
  1046. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  1047. FloatVectorOperations::abs (data2, data1, num);
  1048. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  1049. fillRandomly (random, int1, num);
  1050. doConversionTest (u, data1, data2, int1, num);
  1051. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  1052. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  1053. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  1054. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  1055. }
  1056. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  1057. {
  1058. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  1059. convertFixed (data2, int1, 2.0f, num);
  1060. u.expect (buffersMatch (data1, data2, num));
  1061. }
  1062. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  1063. static void fillRandomly (Random& random, ValueType* d, int num)
  1064. {
  1065. while (--num >= 0)
  1066. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  1067. }
  1068. static void fillRandomly (Random& random, int* d, int num)
  1069. {
  1070. while (--num >= 0)
  1071. *d++ = random.nextInt();
  1072. }
  1073. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  1074. {
  1075. while (--num >= 0)
  1076. *d++ = (float) *s++ * multiplier;
  1077. }
  1078. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  1079. {
  1080. while (--num >= 0)
  1081. if (*d++ != target)
  1082. return false;
  1083. return true;
  1084. }
  1085. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  1086. {
  1087. while (--num >= 0)
  1088. if (! valuesMatch (*d1++, *d2++))
  1089. return false;
  1090. return true;
  1091. }
  1092. static bool valuesMatch (ValueType v1, ValueType v2)
  1093. {
  1094. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  1095. }
  1096. };
  1097. void runTest() override
  1098. {
  1099. beginTest ("FloatVectorOperations");
  1100. for (int i = 1000; --i >= 0;)
  1101. {
  1102. TestRunner<float>::runTest (*this, getRandom());
  1103. TestRunner<double>::runTest (*this, getRandom());
  1104. }
  1105. }
  1106. };
  1107. static FloatVectorOperationsTests vectorOpTests;
  1108. #endif
  1109. } // namespace juce