The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1177 lines
53KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2016 - ROLI Ltd.
  5. Permission is granted to use this software under the terms of the ISC license
  6. http://www.isc.org/downloads/software-support-policy/isc-license/
  7. Permission to use, copy, modify, and/or distribute this software for any
  8. purpose with or without fee is hereby granted, provided that the above
  9. copyright notice and this permission notice appear in all copies.
  10. THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
  11. TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  12. FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
  13. OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
  14. USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  15. TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  16. OF THIS SOFTWARE.
  17. -----------------------------------------------------------------------------
  18. To release a closed-source product which uses other parts of JUCE not
  19. licensed under the ISC terms, commercial licenses are available: visit
  20. www.juce.com for more information.
  21. ==============================================================================
  22. */
  23. namespace FloatVectorHelpers
  24. {
  25. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  26. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  27. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  28. #if JUCE_USE_SSE_INTRINSICS
  29. inline static bool isAligned (const void* p) noexcept
  30. {
  31. return (((pointer_sized_int) p) & 15) == 0;
  32. }
  33. struct BasicOps32
  34. {
  35. typedef float Type;
  36. typedef __m128 ParallelType;
  37. typedef __m128 IntegerType;
  38. enum { numParallel = 4 };
  39. // Integer and parallel types are the same for SSE. On neon they have different types
  40. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  41. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  42. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  43. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  44. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  45. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  46. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  47. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  48. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  49. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  50. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  51. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  52. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  53. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  54. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  55. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  56. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  57. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  58. };
  59. struct BasicOps64
  60. {
  61. typedef double Type;
  62. typedef __m128d ParallelType;
  63. typedef __m128d IntegerType;
  64. enum { numParallel = 2 };
  65. // Integer and parallel types are the same for SSE. On neon they have different types
  66. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  67. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  68. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  69. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  70. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  71. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  72. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  73. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  74. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  75. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  76. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  77. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  78. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  79. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  80. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  81. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  82. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  83. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  84. };
  85. #define JUCE_BEGIN_VEC_OP \
  86. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  87. { \
  88. const int numLongOps = num / Mode::numParallel;
  89. #define JUCE_FINISH_VEC_OP(normalOp) \
  90. num &= (Mode::numParallel - 1); \
  91. if (num == 0) return; \
  92. } \
  93. for (int i = 0; i < num; ++i) normalOp;
  94. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  95. JUCE_BEGIN_VEC_OP \
  96. setupOp \
  97. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  98. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  99. JUCE_FINISH_VEC_OP (normalOp)
  100. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  101. JUCE_BEGIN_VEC_OP \
  102. setupOp \
  103. if (FloatVectorHelpers::isAligned (dest)) \
  104. { \
  105. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  106. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  107. }\
  108. else \
  109. { \
  110. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  111. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  112. } \
  113. JUCE_FINISH_VEC_OP (normalOp)
  114. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  115. JUCE_BEGIN_VEC_OP \
  116. setupOp \
  117. if (FloatVectorHelpers::isAligned (dest)) \
  118. { \
  119. if (FloatVectorHelpers::isAligned (src1)) \
  120. { \
  121. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  122. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  123. } \
  124. else \
  125. { \
  126. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  127. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  128. } \
  129. } \
  130. else \
  131. { \
  132. if (FloatVectorHelpers::isAligned (src1)) \
  133. { \
  134. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  135. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  136. } \
  137. else \
  138. { \
  139. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  140. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  141. } \
  142. } \
  143. JUCE_FINISH_VEC_OP (normalOp)
  144. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  145. JUCE_BEGIN_VEC_OP \
  146. setupOp \
  147. if (FloatVectorHelpers::isAligned (dest)) \
  148. { \
  149. if (FloatVectorHelpers::isAligned (src1)) \
  150. { \
  151. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  152. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  153. } \
  154. else \
  155. { \
  156. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  157. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  158. } \
  159. } \
  160. else \
  161. { \
  162. if (FloatVectorHelpers::isAligned (src1)) \
  163. { \
  164. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  165. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  166. } \
  167. else \
  168. { \
  169. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  170. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  171. } \
  172. } \
  173. JUCE_FINISH_VEC_OP (normalOp)
  174. //==============================================================================
  175. #elif JUCE_USE_ARM_NEON
  176. struct BasicOps32
  177. {
  178. typedef float Type;
  179. typedef float32x4_t ParallelType;
  180. typedef uint32x4_t IntegerType;
  181. union signMaskUnion { ParallelType f; IntegerType i; };
  182. enum { numParallel = 4 };
  183. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  184. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  185. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  186. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  187. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  188. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  189. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  190. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  191. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  192. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  193. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  194. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  195. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  196. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  197. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  198. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  199. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  200. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  201. };
  202. struct BasicOps64
  203. {
  204. typedef double Type;
  205. typedef double ParallelType;
  206. typedef uint64 IntegerType;
  207. union signMaskUnion { ParallelType f; IntegerType i; };
  208. enum { numParallel = 1 };
  209. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  210. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  211. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  212. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  213. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  214. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  215. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  216. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  217. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  218. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  219. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  220. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  221. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  222. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  223. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  224. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  225. static forcedinline Type max (ParallelType a) noexcept { return a; }
  226. static forcedinline Type min (ParallelType a) noexcept { return a; }
  227. };
  228. #define JUCE_BEGIN_VEC_OP \
  229. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  230. if (Mode::numParallel > 1) \
  231. { \
  232. const int numLongOps = num / Mode::numParallel;
  233. #define JUCE_FINISH_VEC_OP(normalOp) \
  234. num &= (Mode::numParallel - 1); \
  235. if (num == 0) return; \
  236. } \
  237. for (int i = 0; i < num; ++i) normalOp;
  238. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  239. JUCE_BEGIN_VEC_OP \
  240. setupOp \
  241. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  242. JUCE_FINISH_VEC_OP (normalOp)
  243. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  244. JUCE_BEGIN_VEC_OP \
  245. setupOp \
  246. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  247. JUCE_FINISH_VEC_OP (normalOp)
  248. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  249. JUCE_BEGIN_VEC_OP \
  250. setupOp \
  251. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  252. JUCE_FINISH_VEC_OP (normalOp)
  253. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  254. JUCE_BEGIN_VEC_OP \
  255. setupOp \
  256. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  257. JUCE_FINISH_VEC_OP (normalOp)
  258. //==============================================================================
  259. #else
  260. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  261. for (int i = 0; i < num; ++i) normalOp;
  262. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  263. for (int i = 0; i < num; ++i) normalOp;
  264. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  265. for (int i = 0; i < num; ++i) normalOp;
  266. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  267. for (int i = 0; i < num; ++i) normalOp;
  268. #endif
  269. //==============================================================================
  270. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  271. for (int i = 0; i < numLongOps; ++i) \
  272. { \
  273. locals (srcLoad, dstLoad); \
  274. dstStore (dest, vecOp); \
  275. increment; \
  276. }
  277. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  278. for (int i = 0; i < numLongOps; ++i) \
  279. { \
  280. locals (src1Load, src2Load); \
  281. dstStore (dest, vecOp); \
  282. increment; \
  283. }
  284. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  285. for (int i = 0; i < numLongOps; ++i) \
  286. { \
  287. locals (src1Load, src2Load, dstLoad); \
  288. dstStore (dest, vecOp); \
  289. increment; \
  290. }
  291. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  292. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  293. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  294. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  295. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  296. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  297. union signMask32 { float f; uint32 i; };
  298. union signMask64 { double d; uint64 i; };
  299. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  300. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  301. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  302. template <typename Mode>
  303. struct MinMax
  304. {
  305. typedef typename Mode::Type Type;
  306. typedef typename Mode::ParallelType ParallelType;
  307. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  308. {
  309. int numLongOps = num / Mode::numParallel;
  310. if (numLongOps > 1)
  311. {
  312. ParallelType val;
  313. #if ! JUCE_USE_ARM_NEON
  314. if (isAligned (src))
  315. {
  316. val = Mode::loadA (src);
  317. if (isMinimum)
  318. {
  319. while (--numLongOps > 0)
  320. {
  321. src += Mode::numParallel;
  322. val = Mode::min (val, Mode::loadA (src));
  323. }
  324. }
  325. else
  326. {
  327. while (--numLongOps > 0)
  328. {
  329. src += Mode::numParallel;
  330. val = Mode::max (val, Mode::loadA (src));
  331. }
  332. }
  333. }
  334. else
  335. #endif
  336. {
  337. val = Mode::loadU (src);
  338. if (isMinimum)
  339. {
  340. while (--numLongOps > 0)
  341. {
  342. src += Mode::numParallel;
  343. val = Mode::min (val, Mode::loadU (src));
  344. }
  345. }
  346. else
  347. {
  348. while (--numLongOps > 0)
  349. {
  350. src += Mode::numParallel;
  351. val = Mode::max (val, Mode::loadU (src));
  352. }
  353. }
  354. }
  355. Type result = isMinimum ? Mode::min (val)
  356. : Mode::max (val);
  357. num &= (Mode::numParallel - 1);
  358. src += Mode::numParallel;
  359. for (int i = 0; i < num; ++i)
  360. result = isMinimum ? jmin (result, src[i])
  361. : jmax (result, src[i]);
  362. return result;
  363. }
  364. return isMinimum ? juce::findMinimum (src, num)
  365. : juce::findMaximum (src, num);
  366. }
  367. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  368. {
  369. int numLongOps = num / Mode::numParallel;
  370. if (numLongOps > 1)
  371. {
  372. ParallelType mn, mx;
  373. #if ! JUCE_USE_ARM_NEON
  374. if (isAligned (src))
  375. {
  376. mn = Mode::loadA (src);
  377. mx = mn;
  378. while (--numLongOps > 0)
  379. {
  380. src += Mode::numParallel;
  381. const ParallelType v = Mode::loadA (src);
  382. mn = Mode::min (mn, v);
  383. mx = Mode::max (mx, v);
  384. }
  385. }
  386. else
  387. #endif
  388. {
  389. mn = Mode::loadU (src);
  390. mx = mn;
  391. while (--numLongOps > 0)
  392. {
  393. src += Mode::numParallel;
  394. const ParallelType v = Mode::loadU (src);
  395. mn = Mode::min (mn, v);
  396. mx = Mode::max (mx, v);
  397. }
  398. }
  399. Range<Type> result (Mode::min (mn),
  400. Mode::max (mx));
  401. num &= (Mode::numParallel - 1);
  402. src += Mode::numParallel;
  403. for (int i = 0; i < num; ++i)
  404. result = result.getUnionWith (src[i]);
  405. return result;
  406. }
  407. return Range<Type>::findMinAndMax (src, num);
  408. }
  409. };
  410. #endif
  411. }
  412. //==============================================================================
  413. namespace
  414. {
  415. #if JUCE_USE_VDSP_FRAMEWORK
  416. // This casts away constness to account for slightly different vDSP function signatures
  417. // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
  418. template <typename ValueType>
  419. ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
  420. #endif
  421. }
  422. //==============================================================================
  423. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  424. {
  425. #if JUCE_USE_VDSP_FRAMEWORK
  426. vDSP_vclr (dest, 1, (size_t) num);
  427. #else
  428. zeromem (dest, (size_t) num * sizeof (float));
  429. #endif
  430. }
  431. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  432. {
  433. #if JUCE_USE_VDSP_FRAMEWORK
  434. vDSP_vclrD (dest, 1, (size_t) num);
  435. #else
  436. zeromem (dest, (size_t) num * sizeof (double));
  437. #endif
  438. }
  439. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  440. {
  441. #if JUCE_USE_VDSP_FRAMEWORK
  442. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  443. #else
  444. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  445. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  446. #endif
  447. }
  448. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  449. {
  450. #if JUCE_USE_VDSP_FRAMEWORK
  451. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  452. #else
  453. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  454. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  455. #endif
  456. }
  457. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  458. {
  459. memcpy (dest, src, (size_t) num * sizeof (float));
  460. }
  461. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  462. {
  463. memcpy (dest, src, (size_t) num * sizeof (double));
  464. }
  465. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  466. {
  467. #if JUCE_USE_VDSP_FRAMEWORK
  468. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  469. #else
  470. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  471. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  472. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  473. #endif
  474. }
  475. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  476. {
  477. #if JUCE_USE_VDSP_FRAMEWORK
  478. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  479. #else
  480. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  481. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  482. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  483. #endif
  484. }
  485. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  486. {
  487. #if JUCE_USE_VDSP_FRAMEWORK
  488. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  489. #else
  490. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  491. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  492. #endif
  493. }
  494. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  495. {
  496. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  497. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  498. }
  499. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
  500. {
  501. #if JUCE_USE_VDSP_FRAMEWORK
  502. vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  503. #else
  504. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  505. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  506. const Mode::ParallelType am = Mode::load1 (amount);)
  507. #endif
  508. }
  509. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
  510. {
  511. #if JUCE_USE_VDSP_FRAMEWORK
  512. vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  513. #else
  514. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  515. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  516. const Mode::ParallelType am = Mode::load1 (amount);)
  517. #endif
  518. }
  519. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  520. {
  521. #if JUCE_USE_VDSP_FRAMEWORK
  522. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  523. #else
  524. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  525. #endif
  526. }
  527. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  528. {
  529. #if JUCE_USE_VDSP_FRAMEWORK
  530. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  531. #else
  532. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  533. #endif
  534. }
  535. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  536. {
  537. #if JUCE_USE_VDSP_FRAMEWORK
  538. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  539. #else
  540. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  541. #endif
  542. }
  543. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  544. {
  545. #if JUCE_USE_VDSP_FRAMEWORK
  546. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  547. #else
  548. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  549. #endif
  550. }
  551. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  552. {
  553. #if JUCE_USE_VDSP_FRAMEWORK
  554. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  555. #else
  556. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  557. #endif
  558. }
  559. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  560. {
  561. #if JUCE_USE_VDSP_FRAMEWORK
  562. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  563. #else
  564. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  565. #endif
  566. }
  567. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  568. {
  569. #if JUCE_USE_VDSP_FRAMEWORK
  570. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  571. #else
  572. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  573. #endif
  574. }
  575. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  576. {
  577. #if JUCE_USE_VDSP_FRAMEWORK
  578. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  579. #else
  580. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  581. #endif
  582. }
  583. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  584. {
  585. #if JUCE_USE_VDSP_FRAMEWORK
  586. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  587. #else
  588. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  589. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  590. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  591. #endif
  592. }
  593. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  594. {
  595. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  596. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  597. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  598. }
  599. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  600. {
  601. #if JUCE_USE_VDSP_FRAMEWORK
  602. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  603. #else
  604. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  605. JUCE_LOAD_SRC1_SRC2_DEST,
  606. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  607. #endif
  608. }
  609. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  610. {
  611. #if JUCE_USE_VDSP_FRAMEWORK
  612. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  613. #else
  614. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  615. JUCE_LOAD_SRC1_SRC2_DEST,
  616. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  617. #endif
  618. }
  619. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  620. {
  621. #if JUCE_USE_VDSP_FRAMEWORK
  622. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  623. #else
  624. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  625. #endif
  626. }
  627. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  628. {
  629. #if JUCE_USE_VDSP_FRAMEWORK
  630. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  631. #else
  632. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  633. #endif
  634. }
  635. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  636. {
  637. #if JUCE_USE_VDSP_FRAMEWORK
  638. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  639. #else
  640. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  641. #endif
  642. }
  643. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  644. {
  645. #if JUCE_USE_VDSP_FRAMEWORK
  646. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  647. #else
  648. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  649. #endif
  650. }
  651. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  652. {
  653. #if JUCE_USE_VDSP_FRAMEWORK
  654. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  655. #else
  656. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  657. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  658. #endif
  659. }
  660. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  661. {
  662. #if JUCE_USE_VDSP_FRAMEWORK
  663. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  664. #else
  665. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  666. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  667. #endif
  668. }
  669. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  670. {
  671. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  672. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  673. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  674. }
  675. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  676. {
  677. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  678. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  679. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  680. }
  681. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  682. {
  683. #if JUCE_USE_VDSP_FRAMEWORK
  684. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  685. #else
  686. copyWithMultiply (dest, src, -1.0f, num);
  687. #endif
  688. }
  689. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  690. {
  691. #if JUCE_USE_VDSP_FRAMEWORK
  692. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  693. #else
  694. copyWithMultiply (dest, src, -1.0f, num);
  695. #endif
  696. }
  697. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  698. {
  699. #if JUCE_USE_VDSP_FRAMEWORK
  700. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  701. #else
  702. FloatVectorHelpers::signMask32 signMask;
  703. signMask.i = 0x7fffffffUL;
  704. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  705. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  706. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  707. ignoreUnused (signMask);
  708. #endif
  709. }
  710. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  711. {
  712. #if JUCE_USE_VDSP_FRAMEWORK
  713. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  714. #else
  715. FloatVectorHelpers::signMask64 signMask;
  716. signMask.i = 0x7fffffffffffffffULL;
  717. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  718. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  719. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  720. ignoreUnused (signMask);
  721. #endif
  722. }
  723. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  724. {
  725. #if JUCE_USE_ARM_NEON
  726. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  727. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  728. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  729. #else
  730. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  731. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  732. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  733. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  734. #endif
  735. }
  736. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  737. {
  738. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  739. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  740. const Mode::ParallelType cmp = Mode::load1 (comp);)
  741. }
  742. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  743. {
  744. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  745. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  746. const Mode::ParallelType cmp = Mode::load1 (comp);)
  747. }
  748. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  749. {
  750. #if JUCE_USE_VDSP_FRAMEWORK
  751. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  752. #else
  753. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  754. #endif
  755. }
  756. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  757. {
  758. #if JUCE_USE_VDSP_FRAMEWORK
  759. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  760. #else
  761. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  762. #endif
  763. }
  764. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  765. {
  766. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  767. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  768. const Mode::ParallelType cmp = Mode::load1 (comp);)
  769. }
  770. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  771. {
  772. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  773. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  774. const Mode::ParallelType cmp = Mode::load1 (comp);)
  775. }
  776. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  777. {
  778. #if JUCE_USE_VDSP_FRAMEWORK
  779. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  780. #else
  781. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  782. #endif
  783. }
  784. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  785. {
  786. #if JUCE_USE_VDSP_FRAMEWORK
  787. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  788. #else
  789. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  790. #endif
  791. }
  792. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  793. {
  794. jassert(high >= low);
  795. #if JUCE_USE_VDSP_FRAMEWORK
  796. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  797. #else
  798. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  799. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  800. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  801. #endif
  802. }
  803. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  804. {
  805. jassert(high >= low);
  806. #if JUCE_USE_VDSP_FRAMEWORK
  807. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  808. #else
  809. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  810. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  811. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  812. #endif
  813. }
  814. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  815. {
  816. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  817. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  818. #else
  819. return Range<float>::findMinAndMax (src, num);
  820. #endif
  821. }
  822. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  823. {
  824. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  825. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  826. #else
  827. return Range<double>::findMinAndMax (src, num);
  828. #endif
  829. }
  830. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  831. {
  832. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  833. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  834. #else
  835. return juce::findMinimum (src, num);
  836. #endif
  837. }
  838. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  839. {
  840. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  841. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  842. #else
  843. return juce::findMinimum (src, num);
  844. #endif
  845. }
  846. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  847. {
  848. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  849. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  850. #else
  851. return juce::findMaximum (src, num);
  852. #endif
  853. }
  854. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  855. {
  856. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  857. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  858. #else
  859. return juce::findMaximum (src, num);
  860. #endif
  861. }
  862. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  863. {
  864. #if JUCE_USE_SSE_INTRINSICS
  865. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  866. #endif
  867. ignoreUnused (shouldEnable);
  868. }
  869. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept
  870. {
  871. #if JUCE_USE_SSE_INTRINSICS
  872. const unsigned int mxcsr = _mm_getcsr();
  873. _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
  874. #endif
  875. }
  876. //==============================================================================
  877. //==============================================================================
  878. #if JUCE_UNIT_TESTS
  879. class FloatVectorOperationsTests : public UnitTest
  880. {
  881. public:
  882. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  883. template <typename ValueType>
  884. struct TestRunner
  885. {
  886. static void runTest (UnitTest& u, Random random)
  887. {
  888. const int range = random.nextBool() ? 500 : 10;
  889. const int num = random.nextInt (range) + 1;
  890. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  891. HeapBlock<int> buffer3 ((size_t) num + 16);
  892. #if JUCE_ARM
  893. ValueType* const data1 = buffer1;
  894. ValueType* const data2 = buffer2;
  895. int* const int1 = buffer3;
  896. #else
  897. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  898. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  899. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  900. #endif
  901. fillRandomly (random, data1, num);
  902. fillRandomly (random, data2, num);
  903. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  904. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  905. u.expect (minMax1 == minMax2);
  906. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  907. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  908. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  909. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  910. FloatVectorOperations::clear (data1, num);
  911. u.expect (areAllValuesEqual (data1, num, 0));
  912. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  913. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  914. FloatVectorOperations::add (data1, (ValueType) 2, num);
  915. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  916. FloatVectorOperations::copy (data2, data1, num);
  917. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  918. FloatVectorOperations::add (data2, data1, num);
  919. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  920. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  921. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  922. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  923. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  924. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  925. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  926. FloatVectorOperations::multiply (data1, data2, num);
  927. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  928. FloatVectorOperations::negate (data2, data1, num);
  929. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  930. FloatVectorOperations::subtract (data1, data2, num);
  931. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  932. FloatVectorOperations::abs (data1, data2, num);
  933. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  934. FloatVectorOperations::abs (data2, data1, num);
  935. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  936. fillRandomly (random, int1, num);
  937. doConversionTest (u, data1, data2, int1, num);
  938. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  939. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  940. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  941. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  942. }
  943. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  944. {
  945. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  946. convertFixed (data2, int1, 2.0f, num);
  947. u.expect (buffersMatch (data1, data2, num));
  948. }
  949. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  950. static void fillRandomly (Random& random, ValueType* d, int num)
  951. {
  952. while (--num >= 0)
  953. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  954. }
  955. static void fillRandomly (Random& random, int* d, int num)
  956. {
  957. while (--num >= 0)
  958. *d++ = random.nextInt();
  959. }
  960. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  961. {
  962. while (--num >= 0)
  963. *d++ = *s++ * multiplier;
  964. }
  965. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  966. {
  967. while (--num >= 0)
  968. if (*d++ != target)
  969. return false;
  970. return true;
  971. }
  972. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  973. {
  974. while (--num >= 0)
  975. if (! valuesMatch (*d1++, *d2++))
  976. return false;
  977. return true;
  978. }
  979. static bool valuesMatch (ValueType v1, ValueType v2)
  980. {
  981. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  982. }
  983. };
  984. void runTest() override
  985. {
  986. beginTest ("FloatVectorOperations");
  987. for (int i = 1000; --i >= 0;)
  988. {
  989. TestRunner<float>::runTest (*this, getRandom());
  990. TestRunner<double>::runTest (*this, getRandom());
  991. }
  992. }
  993. };
  994. static FloatVectorOperationsTests vectorOpTests;
  995. #endif