Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1173 lines
52KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. inline static bool isAligned (const void* p) noexcept
  24. {
  25. return (((pointer_sized_int) p) & 15) == 0;
  26. }
  27. struct BasicOps32
  28. {
  29. typedef float Type;
  30. typedef __m128 ParallelType;
  31. typedef __m128 IntegerType;
  32. enum { numParallel = 4 };
  33. // Integer and parallel types are the same for SSE. On neon they have different types
  34. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  35. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  36. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  37. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  38. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  39. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  40. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  41. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  42. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  43. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  44. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  45. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  46. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  47. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  48. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  49. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  50. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  51. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  52. };
  53. struct BasicOps64
  54. {
  55. typedef double Type;
  56. typedef __m128d ParallelType;
  57. typedef __m128d IntegerType;
  58. enum { numParallel = 2 };
  59. // Integer and parallel types are the same for SSE. On neon they have different types
  60. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  61. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  62. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  63. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  64. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  65. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  66. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  67. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  68. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  69. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  70. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  71. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  72. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  73. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  74. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  75. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  76. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  77. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  78. };
  79. #define JUCE_BEGIN_VEC_OP \
  80. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  81. { \
  82. const int numLongOps = num / Mode::numParallel;
  83. #define JUCE_FINISH_VEC_OP(normalOp) \
  84. num &= (Mode::numParallel - 1); \
  85. if (num == 0) return; \
  86. } \
  87. for (int i = 0; i < num; ++i) normalOp;
  88. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  89. JUCE_BEGIN_VEC_OP \
  90. setupOp \
  91. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  92. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  93. JUCE_FINISH_VEC_OP (normalOp)
  94. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  95. JUCE_BEGIN_VEC_OP \
  96. setupOp \
  97. if (FloatVectorHelpers::isAligned (dest)) \
  98. { \
  99. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  100. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  101. }\
  102. else \
  103. { \
  104. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  105. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  106. } \
  107. JUCE_FINISH_VEC_OP (normalOp)
  108. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  109. JUCE_BEGIN_VEC_OP \
  110. setupOp \
  111. if (FloatVectorHelpers::isAligned (dest)) \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src1)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  116. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  117. } \
  118. else \
  119. { \
  120. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  121. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  122. } \
  123. } \
  124. else \
  125. { \
  126. if (FloatVectorHelpers::isAligned (src1)) \
  127. { \
  128. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  129. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  130. } \
  131. else \
  132. { \
  133. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  134. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  135. } \
  136. } \
  137. JUCE_FINISH_VEC_OP (normalOp)
  138. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  139. JUCE_BEGIN_VEC_OP \
  140. setupOp \
  141. if (FloatVectorHelpers::isAligned (dest)) \
  142. { \
  143. if (FloatVectorHelpers::isAligned (src1)) \
  144. { \
  145. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  146. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  147. } \
  148. else \
  149. { \
  150. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  151. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  152. } \
  153. } \
  154. else \
  155. { \
  156. if (FloatVectorHelpers::isAligned (src1)) \
  157. { \
  158. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  159. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  160. } \
  161. else \
  162. { \
  163. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  164. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  165. } \
  166. } \
  167. JUCE_FINISH_VEC_OP (normalOp)
  168. //==============================================================================
  169. #elif JUCE_USE_ARM_NEON
  170. struct BasicOps32
  171. {
  172. typedef float Type;
  173. typedef float32x4_t ParallelType;
  174. typedef uint32x4_t IntegerType;
  175. union signMaskUnion { ParallelType f; IntegerType i; };
  176. enum { numParallel = 4 };
  177. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  178. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  179. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  180. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  181. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  182. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  183. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  184. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  185. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  186. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  187. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  188. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  189. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  190. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  191. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  192. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  193. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  194. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  195. };
  196. struct BasicOps64
  197. {
  198. typedef double Type;
  199. typedef double ParallelType;
  200. typedef uint64 IntegerType;
  201. union signMaskUnion { ParallelType f; IntegerType i; };
  202. enum { numParallel = 1 };
  203. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  204. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  205. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  206. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  207. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  208. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  209. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  210. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  211. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  212. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  213. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  214. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  215. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  216. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  217. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  218. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  219. static forcedinline Type max (ParallelType a) noexcept { return a; }
  220. static forcedinline Type min (ParallelType a) noexcept { return a; }
  221. };
  222. #define JUCE_BEGIN_VEC_OP \
  223. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  224. if (Mode::numParallel > 1) \
  225. { \
  226. const int numLongOps = num / Mode::numParallel;
  227. #define JUCE_FINISH_VEC_OP(normalOp) \
  228. num &= (Mode::numParallel - 1); \
  229. if (num == 0) return; \
  230. } \
  231. for (int i = 0; i < num; ++i) normalOp;
  232. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  233. JUCE_BEGIN_VEC_OP \
  234. setupOp \
  235. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  236. JUCE_FINISH_VEC_OP (normalOp)
  237. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  238. JUCE_BEGIN_VEC_OP \
  239. setupOp \
  240. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  241. JUCE_FINISH_VEC_OP (normalOp)
  242. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  243. JUCE_BEGIN_VEC_OP \
  244. setupOp \
  245. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  246. JUCE_FINISH_VEC_OP (normalOp)
  247. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  248. JUCE_BEGIN_VEC_OP \
  249. setupOp \
  250. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  251. JUCE_FINISH_VEC_OP (normalOp)
  252. //==============================================================================
  253. #else
  254. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  255. for (int i = 0; i < num; ++i) normalOp;
  256. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  257. for (int i = 0; i < num; ++i) normalOp;
  258. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  259. for (int i = 0; i < num; ++i) normalOp;
  260. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  261. for (int i = 0; i < num; ++i) normalOp;
  262. #endif
  263. //==============================================================================
  264. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  265. for (int i = 0; i < numLongOps; ++i) \
  266. { \
  267. locals (srcLoad, dstLoad); \
  268. dstStore (dest, vecOp); \
  269. increment; \
  270. }
  271. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  272. for (int i = 0; i < numLongOps; ++i) \
  273. { \
  274. locals (src1Load, src2Load); \
  275. dstStore (dest, vecOp); \
  276. increment; \
  277. }
  278. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  279. for (int i = 0; i < numLongOps; ++i) \
  280. { \
  281. locals (src1Load, src2Load, dstLoad); \
  282. dstStore (dest, vecOp); \
  283. increment; \
  284. }
  285. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  286. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  287. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  288. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  289. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  290. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  291. union signMask32 { float f; uint32 i; };
  292. union signMask64 { double d; uint64 i; };
  293. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  294. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  295. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  296. template <typename Mode>
  297. struct MinMax
  298. {
  299. typedef typename Mode::Type Type;
  300. typedef typename Mode::ParallelType ParallelType;
  301. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  302. {
  303. int numLongOps = num / Mode::numParallel;
  304. if (numLongOps > 1)
  305. {
  306. ParallelType val;
  307. #if ! JUCE_USE_ARM_NEON
  308. if (isAligned (src))
  309. {
  310. val = Mode::loadA (src);
  311. if (isMinimum)
  312. {
  313. while (--numLongOps > 0)
  314. {
  315. src += Mode::numParallel;
  316. val = Mode::min (val, Mode::loadA (src));
  317. }
  318. }
  319. else
  320. {
  321. while (--numLongOps > 0)
  322. {
  323. src += Mode::numParallel;
  324. val = Mode::max (val, Mode::loadA (src));
  325. }
  326. }
  327. }
  328. else
  329. #endif
  330. {
  331. val = Mode::loadU (src);
  332. if (isMinimum)
  333. {
  334. while (--numLongOps > 0)
  335. {
  336. src += Mode::numParallel;
  337. val = Mode::min (val, Mode::loadU (src));
  338. }
  339. }
  340. else
  341. {
  342. while (--numLongOps > 0)
  343. {
  344. src += Mode::numParallel;
  345. val = Mode::max (val, Mode::loadU (src));
  346. }
  347. }
  348. }
  349. Type result = isMinimum ? Mode::min (val)
  350. : Mode::max (val);
  351. num &= (Mode::numParallel - 1);
  352. src += Mode::numParallel;
  353. for (int i = 0; i < num; ++i)
  354. result = isMinimum ? jmin (result, src[i])
  355. : jmax (result, src[i]);
  356. return result;
  357. }
  358. return isMinimum ? juce::findMinimum (src, num)
  359. : juce::findMaximum (src, num);
  360. }
  361. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  362. {
  363. int numLongOps = num / Mode::numParallel;
  364. if (numLongOps > 1)
  365. {
  366. ParallelType mn, mx;
  367. #if ! JUCE_USE_ARM_NEON
  368. if (isAligned (src))
  369. {
  370. mn = Mode::loadA (src);
  371. mx = mn;
  372. while (--numLongOps > 0)
  373. {
  374. src += Mode::numParallel;
  375. const ParallelType v = Mode::loadA (src);
  376. mn = Mode::min (mn, v);
  377. mx = Mode::max (mx, v);
  378. }
  379. }
  380. else
  381. #endif
  382. {
  383. mn = Mode::loadU (src);
  384. mx = mn;
  385. while (--numLongOps > 0)
  386. {
  387. src += Mode::numParallel;
  388. const ParallelType v = Mode::loadU (src);
  389. mn = Mode::min (mn, v);
  390. mx = Mode::max (mx, v);
  391. }
  392. }
  393. Range<Type> result (Mode::min (mn),
  394. Mode::max (mx));
  395. num &= (Mode::numParallel - 1);
  396. src += Mode::numParallel;
  397. for (int i = 0; i < num; ++i)
  398. result = result.getUnionWith (src[i]);
  399. return result;
  400. }
  401. return Range<Type>::findMinAndMax (src, num);
  402. }
  403. };
  404. #endif
  405. }
  406. //==============================================================================
  407. namespace
  408. {
  409. #if JUCE_USE_VDSP_FRAMEWORK
  410. // This casts away constness to account for slightly different vDSP function signatures
  411. // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
  412. template <typename ValueType>
  413. ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
  414. #endif
  415. }
  416. //==============================================================================
  417. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  418. {
  419. #if JUCE_USE_VDSP_FRAMEWORK
  420. vDSP_vclr (dest, 1, (size_t) num);
  421. #else
  422. zeromem (dest, (size_t) num * sizeof (float));
  423. #endif
  424. }
  425. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  426. {
  427. #if JUCE_USE_VDSP_FRAMEWORK
  428. vDSP_vclrD (dest, 1, (size_t) num);
  429. #else
  430. zeromem (dest, (size_t) num * sizeof (double));
  431. #endif
  432. }
  433. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  439. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  440. #endif
  441. }
  442. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  443. {
  444. #if JUCE_USE_VDSP_FRAMEWORK
  445. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  446. #else
  447. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  448. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  449. #endif
  450. }
  451. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  452. {
  453. memcpy (dest, src, (size_t) num * sizeof (float));
  454. }
  455. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  456. {
  457. memcpy (dest, src, (size_t) num * sizeof (double));
  458. }
  459. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  460. {
  461. #if JUCE_USE_VDSP_FRAMEWORK
  462. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  463. #else
  464. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  465. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  466. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  467. #endif
  468. }
  469. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  470. {
  471. #if JUCE_USE_VDSP_FRAMEWORK
  472. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  473. #else
  474. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  475. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  476. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  477. #endif
  478. }
  479. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  480. {
  481. #if JUCE_USE_VDSP_FRAMEWORK
  482. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  483. #else
  484. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  485. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  486. #endif
  487. }
  488. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  489. {
  490. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  491. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  492. }
  493. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
  494. {
  495. #if JUCE_USE_VDSP_FRAMEWORK
  496. vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  497. #else
  498. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  499. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  500. const Mode::ParallelType am = Mode::load1 (amount);)
  501. #endif
  502. }
  503. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
  504. {
  505. #if JUCE_USE_VDSP_FRAMEWORK
  506. vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  507. #else
  508. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  509. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  510. const Mode::ParallelType am = Mode::load1 (amount);)
  511. #endif
  512. }
  513. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  514. {
  515. #if JUCE_USE_VDSP_FRAMEWORK
  516. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  517. #else
  518. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  519. #endif
  520. }
  521. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  522. {
  523. #if JUCE_USE_VDSP_FRAMEWORK
  524. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  525. #else
  526. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  527. #endif
  528. }
  529. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  530. {
  531. #if JUCE_USE_VDSP_FRAMEWORK
  532. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  533. #else
  534. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  535. #endif
  536. }
  537. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  538. {
  539. #if JUCE_USE_VDSP_FRAMEWORK
  540. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  541. #else
  542. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  543. #endif
  544. }
  545. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  546. {
  547. #if JUCE_USE_VDSP_FRAMEWORK
  548. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  549. #else
  550. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  551. #endif
  552. }
  553. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  554. {
  555. #if JUCE_USE_VDSP_FRAMEWORK
  556. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  557. #else
  558. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  559. #endif
  560. }
  561. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  562. {
  563. #if JUCE_USE_VDSP_FRAMEWORK
  564. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  565. #else
  566. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  567. #endif
  568. }
  569. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  570. {
  571. #if JUCE_USE_VDSP_FRAMEWORK
  572. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  573. #else
  574. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  575. #endif
  576. }
  577. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  578. {
  579. #if JUCE_USE_VDSP_FRAMEWORK
  580. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  581. #else
  582. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  583. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  584. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  585. #endif
  586. }
  587. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  588. {
  589. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  590. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  591. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  592. }
  593. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  594. {
  595. #if JUCE_USE_VDSP_FRAMEWORK
  596. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  597. #else
  598. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  599. JUCE_LOAD_SRC1_SRC2_DEST,
  600. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  601. #endif
  602. }
  603. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  604. {
  605. #if JUCE_USE_VDSP_FRAMEWORK
  606. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  607. #else
  608. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  609. JUCE_LOAD_SRC1_SRC2_DEST,
  610. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  611. #endif
  612. }
  613. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  614. {
  615. #if JUCE_USE_VDSP_FRAMEWORK
  616. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  617. #else
  618. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  619. #endif
  620. }
  621. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  622. {
  623. #if JUCE_USE_VDSP_FRAMEWORK
  624. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  625. #else
  626. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  627. #endif
  628. }
  629. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  630. {
  631. #if JUCE_USE_VDSP_FRAMEWORK
  632. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  633. #else
  634. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  635. #endif
  636. }
  637. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  638. {
  639. #if JUCE_USE_VDSP_FRAMEWORK
  640. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  641. #else
  642. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  643. #endif
  644. }
  645. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  646. {
  647. #if JUCE_USE_VDSP_FRAMEWORK
  648. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  649. #else
  650. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  651. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  652. #endif
  653. }
  654. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  655. {
  656. #if JUCE_USE_VDSP_FRAMEWORK
  657. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  658. #else
  659. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  660. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  661. #endif
  662. }
  663. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  664. {
  665. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  666. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  667. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  668. }
  669. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  670. {
  671. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  672. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  673. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  674. }
  675. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  676. {
  677. #if JUCE_USE_VDSP_FRAMEWORK
  678. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  679. #else
  680. copyWithMultiply (dest, src, -1.0f, num);
  681. #endif
  682. }
  683. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  684. {
  685. #if JUCE_USE_VDSP_FRAMEWORK
  686. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  687. #else
  688. copyWithMultiply (dest, src, -1.0f, num);
  689. #endif
  690. }
  691. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  692. {
  693. #if JUCE_USE_VDSP_FRAMEWORK
  694. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  695. #else
  696. FloatVectorHelpers::signMask32 signMask;
  697. signMask.i = 0x7fffffffUL;
  698. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  699. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  700. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  701. ignoreUnused (signMask);
  702. #endif
  703. }
  704. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  705. {
  706. #if JUCE_USE_VDSP_FRAMEWORK
  707. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  708. #else
  709. FloatVectorHelpers::signMask64 signMask;
  710. signMask.i = 0x7fffffffffffffffULL;
  711. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  712. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  713. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  714. ignoreUnused (signMask);
  715. #endif
  716. }
  717. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  718. {
  719. #if JUCE_USE_ARM_NEON
  720. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  721. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  722. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  723. #else
  724. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  725. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  726. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  727. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  728. #endif
  729. }
  730. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  731. {
  732. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  733. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  734. const Mode::ParallelType cmp = Mode::load1 (comp);)
  735. }
  736. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  737. {
  738. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  739. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  740. const Mode::ParallelType cmp = Mode::load1 (comp);)
  741. }
  742. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  743. {
  744. #if JUCE_USE_VDSP_FRAMEWORK
  745. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  746. #else
  747. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  748. #endif
  749. }
  750. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  751. {
  752. #if JUCE_USE_VDSP_FRAMEWORK
  753. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  754. #else
  755. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  756. #endif
  757. }
  758. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  759. {
  760. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  761. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  762. const Mode::ParallelType cmp = Mode::load1 (comp);)
  763. }
  764. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  765. {
  766. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  767. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  768. const Mode::ParallelType cmp = Mode::load1 (comp);)
  769. }
  770. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  771. {
  772. #if JUCE_USE_VDSP_FRAMEWORK
  773. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  774. #else
  775. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  776. #endif
  777. }
  778. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  779. {
  780. #if JUCE_USE_VDSP_FRAMEWORK
  781. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  782. #else
  783. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  784. #endif
  785. }
  786. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  787. {
  788. jassert(high >= low);
  789. #if JUCE_USE_VDSP_FRAMEWORK
  790. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  791. #else
  792. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  793. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  794. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  795. #endif
  796. }
  797. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  798. {
  799. jassert(high >= low);
  800. #if JUCE_USE_VDSP_FRAMEWORK
  801. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  802. #else
  803. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  804. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  805. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  806. #endif
  807. }
  808. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  809. {
  810. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  811. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  812. #else
  813. return Range<float>::findMinAndMax (src, num);
  814. #endif
  815. }
  816. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  817. {
  818. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  819. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  820. #else
  821. return Range<double>::findMinAndMax (src, num);
  822. #endif
  823. }
  824. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  825. {
  826. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  827. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  828. #else
  829. return juce::findMinimum (src, num);
  830. #endif
  831. }
  832. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  833. {
  834. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  835. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  836. #else
  837. return juce::findMinimum (src, num);
  838. #endif
  839. }
  840. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  841. {
  842. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  843. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  844. #else
  845. return juce::findMaximum (src, num);
  846. #endif
  847. }
  848. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  849. {
  850. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  851. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  852. #else
  853. return juce::findMaximum (src, num);
  854. #endif
  855. }
  856. #if ! JUCE_MINGW
  857. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  858. {
  859. #if JUCE_USE_SSE_INTRINSICS
  860. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  861. #endif
  862. ignoreUnused (shouldEnable);
  863. }
  864. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept
  865. {
  866. #if JUCE_USE_SSE_INTRINSICS
  867. const unsigned int mxcsr = _mm_getcsr();
  868. _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
  869. #endif
  870. }
  871. #endif
  872. //==============================================================================
  873. //==============================================================================
  874. #if JUCE_UNIT_TESTS
  875. class FloatVectorOperationsTests : public UnitTest
  876. {
  877. public:
  878. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  879. template <typename ValueType>
  880. struct TestRunner
  881. {
  882. static void runTest (UnitTest& u, Random random)
  883. {
  884. const int range = random.nextBool() ? 500 : 10;
  885. const int num = random.nextInt (range) + 1;
  886. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  887. HeapBlock<int> buffer3 ((size_t) num + 16);
  888. #if JUCE_ARM
  889. ValueType* const data1 = buffer1;
  890. ValueType* const data2 = buffer2;
  891. int* const int1 = buffer3;
  892. #else
  893. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  894. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  895. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  896. #endif
  897. fillRandomly (random, data1, num);
  898. fillRandomly (random, data2, num);
  899. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  900. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  901. u.expect (minMax1 == minMax2);
  902. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  903. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  904. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  905. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  906. FloatVectorOperations::clear (data1, num);
  907. u.expect (areAllValuesEqual (data1, num, 0));
  908. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  909. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  910. FloatVectorOperations::add (data1, (ValueType) 2, num);
  911. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  912. FloatVectorOperations::copy (data2, data1, num);
  913. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  914. FloatVectorOperations::add (data2, data1, num);
  915. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  916. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  917. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  918. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  919. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  920. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  921. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  922. FloatVectorOperations::multiply (data1, data2, num);
  923. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  924. FloatVectorOperations::negate (data2, data1, num);
  925. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  926. FloatVectorOperations::subtract (data1, data2, num);
  927. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  928. FloatVectorOperations::abs (data1, data2, num);
  929. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  930. FloatVectorOperations::abs (data2, data1, num);
  931. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  932. fillRandomly (random, int1, num);
  933. doConversionTest (u, data1, data2, int1, num);
  934. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  935. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  936. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  937. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  938. }
  939. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  940. {
  941. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  942. convertFixed (data2, int1, 2.0f, num);
  943. u.expect (buffersMatch (data1, data2, num));
  944. }
  945. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  946. static void fillRandomly (Random& random, ValueType* d, int num)
  947. {
  948. while (--num >= 0)
  949. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  950. }
  951. static void fillRandomly (Random& random, int* d, int num)
  952. {
  953. while (--num >= 0)
  954. *d++ = random.nextInt();
  955. }
  956. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  957. {
  958. while (--num >= 0)
  959. *d++ = *s++ * multiplier;
  960. }
  961. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  962. {
  963. while (--num >= 0)
  964. if (*d++ != target)
  965. return false;
  966. return true;
  967. }
  968. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  969. {
  970. while (--num >= 0)
  971. if (! valuesMatch (*d1++, *d2++))
  972. return false;
  973. return true;
  974. }
  975. static bool valuesMatch (ValueType v1, ValueType v2)
  976. {
  977. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  978. }
  979. };
  980. void runTest() override
  981. {
  982. beginTest ("FloatVectorOperations");
  983. for (int i = 1000; --i >= 0;)
  984. {
  985. TestRunner<float>::runTest (*this, getRandom());
  986. TestRunner<double>::runTest (*this, getRandom());
  987. }
  988. }
  989. };
  990. static FloatVectorOperationsTests vectorOpTests;
  991. #endif