The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1125 lines
50KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. static bool sse2Present = false;
  24. static bool isSSE2Available() noexcept
  25. {
  26. if (sse2Present)
  27. return true;
  28. sse2Present = SystemStats::hasSSE2();
  29. return sse2Present;
  30. }
  31. inline static bool isAligned (const void* p) noexcept
  32. {
  33. return (((pointer_sized_int) p) & 15) == 0;
  34. }
  35. struct BasicOps32
  36. {
  37. typedef float Type;
  38. typedef __m128 ParallelType;
  39. typedef __m128 IntegerType;
  40. enum { numParallel = 4 };
  41. // Integer and parallel types are the same for SSE. On neon they have different types
  42. static forcedinline IntegerType toint (ParallelType v) { return v; }
  43. static forcedinline ParallelType toflt (IntegerType v) { return v; }
  44. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  45. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  46. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  47. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  48. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  49. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  50. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  51. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  52. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  53. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  54. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  55. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  56. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  57. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  58. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  59. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  60. };
  61. struct BasicOps64
  62. {
  63. typedef double Type;
  64. typedef __m128d ParallelType;
  65. typedef __m128d IntegerType;
  66. enum { numParallel = 2 };
  67. // Integer and parallel types are the same for SSE. On neon they have different types
  68. static forcedinline IntegerType toint (ParallelType v) { return v; }
  69. static forcedinline ParallelType toflt (IntegerType v) { return v; }
  70. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  71. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  72. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  73. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  74. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  75. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  76. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  77. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  78. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  79. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  80. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  81. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  82. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  83. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  84. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  85. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  86. };
  87. #define JUCE_BEGIN_VEC_OP \
  88. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  89. if (FloatVectorHelpers::isSSE2Available()) \
  90. { \
  91. const int numLongOps = num / Mode::numParallel;
  92. #define JUCE_FINISH_VEC_OP(normalOp) \
  93. num &= (Mode::numParallel - 1); \
  94. if (num == 0) return; \
  95. } \
  96. for (int i = 0; i < num; ++i) normalOp;
  97. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  98. JUCE_BEGIN_VEC_OP \
  99. setupOp \
  100. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  101. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  102. JUCE_FINISH_VEC_OP (normalOp)
  103. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  104. JUCE_BEGIN_VEC_OP \
  105. setupOp \
  106. if (FloatVectorHelpers::isAligned (dest)) \
  107. { \
  108. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  109. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  110. }\
  111. else \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  114. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  115. } \
  116. JUCE_FINISH_VEC_OP (normalOp)
  117. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  118. JUCE_BEGIN_VEC_OP \
  119. setupOp \
  120. { \
  121. Mode::ParallelType (&loadSrc1) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src1) ? Mode::loadA : Mode::loadU; \
  122. Mode::ParallelType (&loadSrc2) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src2) ? Mode::loadA : Mode::loadU; \
  123. void (&storeDst) (Mode::Type* dest, Mode::ParallelType a) = FloatVectorHelpers::isAligned (dest) ? Mode::storeA : Mode::storeU; \
  124. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, loadSrc1, loadSrc2, storeDst, locals, increment); \
  125. } \
  126. JUCE_FINISH_VEC_OP (normalOp)
  127. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  128. JUCE_BEGIN_VEC_OP \
  129. setupOp \
  130. { \
  131. Mode::ParallelType (&loadSrc1) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src1) ? Mode::loadA : Mode::loadU; \
  132. Mode::ParallelType (&loadSrc2) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src2) ? Mode::loadA : Mode::loadU; \
  133. Mode::ParallelType (&loadDst) (const Mode::Type* v) = FloatVectorHelpers::isAligned (dest) ? Mode::loadA : Mode::loadU; \
  134. void (&storeDst) (Mode::Type* dest, Mode::ParallelType a) = FloatVectorHelpers::isAligned (dest) ? Mode::storeA : Mode::storeU; \
  135. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, loadSrc1, loadSrc2, loadDst, storeDst, locals, increment); \
  136. } \
  137. JUCE_FINISH_VEC_OP (normalOp)
  138. //==============================================================================
  139. #elif JUCE_USE_ARM_NEON
  140. struct BasicOps32
  141. {
  142. typedef float Type;
  143. typedef float32x4_t ParallelType;
  144. typedef uint32x4 IntegerType;
  145. enum { numParallel = 4 };
  146. static forcedinline IntegerType toint (ParallelType v) { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  147. static forcedinline ParallelType toflt (IntegerType v) { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  148. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  149. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  150. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  151. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  152. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  153. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  154. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  155. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  156. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  157. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  158. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  159. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  160. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  161. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  162. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  163. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  164. };
  165. struct BasicOps64
  166. {
  167. typedef double Type;
  168. typedef double ParallelType;
  169. typedef uint64 IntegerType;
  170. enum { numParallel = 1 };
  171. static forcedinline IntegerType toint (ParallelType v) { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  172. static forcedinline ParallelType toflt (IntegerType v) { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  173. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  174. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  175. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  176. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  177. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  178. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  179. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  180. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  181. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  182. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  183. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  184. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  185. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  186. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  187. static forcedinline Type max (ParallelType a) noexcept { return a; }
  188. static forcedinline Type min (ParallelType a) noexcept { return a; }
  189. };
  190. #define JUCE_BEGIN_VEC_OP \
  191. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  192. if (Mode::numParallel > 1) \
  193. { \
  194. const int numLongOps = num / Mode::numParallel;
  195. #define JUCE_FINISH_VEC_OP(normalOp) \
  196. num &= (Mode::numParallel - 1); \
  197. if (num == 0) return; \
  198. } \
  199. for (int i = 0; i < num; ++i) normalOp;
  200. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  201. JUCE_BEGIN_VEC_OP \
  202. setupOp \
  203. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  204. JUCE_FINISH_VEC_OP (normalOp)
  205. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  206. JUCE_BEGIN_VEC_OP \
  207. setupOp \
  208. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  209. JUCE_FINISH_VEC_OP (normalOp)
  210. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  211. JUCE_BEGIN_VEC_OP \
  212. setupOp \
  213. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  214. JUCE_FINISH_VEC_OP (normalOp)
  215. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  216. JUCE_BEGIN_VEC_OP \
  217. setupOp \
  218. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  219. JUCE_FINISH_VEC_OP (normalOp)
  220. //==============================================================================
  221. #else
  222. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  223. for (int i = 0; i < num; ++i) normalOp;
  224. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  225. for (int i = 0; i < num; ++i) normalOp;
  226. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  227. for (int i = 0; i < num; ++i) normalOp;
  228. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  229. for (int i = 0; i < num; ++i) normalOp;
  230. #endif
  231. //==============================================================================
  232. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  233. for (int i = 0; i < numLongOps; ++i) \
  234. { \
  235. locals (srcLoad, dstLoad); \
  236. dstStore (dest, vecOp); \
  237. increment; \
  238. }
  239. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  240. for (int i = 0; i < numLongOps; ++i) \
  241. { \
  242. locals (src1Load, src2Load); \
  243. dstStore (dest, vecOp); \
  244. increment; \
  245. }
  246. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  247. for (int i = 0; i < numLongOps; ++i) \
  248. { \
  249. locals (src1Load, src2Load, dstLoad); \
  250. dstStore (dest, vecOp); \
  251. increment; \
  252. }
  253. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  254. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  255. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  256. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  257. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  258. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  259. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  260. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  261. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  262. template <typename Mode>
  263. struct MinMax
  264. {
  265. typedef typename Mode::Type Type;
  266. typedef typename Mode::ParallelType ParallelType;
  267. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  268. {
  269. int numLongOps = num / Mode::numParallel;
  270. #if JUCE_USE_SSE_INTRINSICS
  271. if (numLongOps > 1 && isSSE2Available())
  272. #else
  273. if (numLongOps > 1)
  274. #endif
  275. {
  276. ParallelType val;
  277. #if ! JUCE_USE_ARM_NEON
  278. if (isAligned (src))
  279. {
  280. val = Mode::loadA (src);
  281. if (isMinimum)
  282. {
  283. while (--numLongOps > 0)
  284. {
  285. src += Mode::numParallel;
  286. val = Mode::min (val, Mode::loadA (src));
  287. }
  288. }
  289. else
  290. {
  291. while (--numLongOps > 0)
  292. {
  293. src += Mode::numParallel;
  294. val = Mode::max (val, Mode::loadA (src));
  295. }
  296. }
  297. }
  298. else
  299. #endif
  300. {
  301. val = Mode::loadU (src);
  302. if (isMinimum)
  303. {
  304. while (--numLongOps > 0)
  305. {
  306. src += Mode::numParallel;
  307. val = Mode::min (val, Mode::loadU (src));
  308. }
  309. }
  310. else
  311. {
  312. while (--numLongOps > 0)
  313. {
  314. src += Mode::numParallel;
  315. val = Mode::max (val, Mode::loadU (src));
  316. }
  317. }
  318. }
  319. Type result = isMinimum ? Mode::min (val)
  320. : Mode::max (val);
  321. num &= (Mode::numParallel - 1);
  322. src += Mode::numParallel;
  323. for (int i = 0; i < num; ++i)
  324. result = isMinimum ? jmin (result, src[i])
  325. : jmax (result, src[i]);
  326. return result;
  327. }
  328. return isMinimum ? juce::findMinimum (src, num)
  329. : juce::findMaximum (src, num);
  330. }
  331. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  332. {
  333. int numLongOps = num / Mode::numParallel;
  334. #if JUCE_USE_SSE_INTRINSICS
  335. if (numLongOps > 1 && isSSE2Available())
  336. #else
  337. if (numLongOps > 1)
  338. #endif
  339. {
  340. ParallelType mn, mx;
  341. #if ! JUCE_USE_ARM_NEON
  342. if (isAligned (src))
  343. {
  344. mn = Mode::loadA (src);
  345. mx = mn;
  346. while (--numLongOps > 0)
  347. {
  348. src += Mode::numParallel;
  349. const ParallelType v = Mode::loadA (src);
  350. mn = Mode::min (mn, v);
  351. mx = Mode::max (mx, v);
  352. }
  353. }
  354. else
  355. #endif
  356. {
  357. mn = Mode::loadU (src);
  358. mx = mn;
  359. while (--numLongOps > 0)
  360. {
  361. src += Mode::numParallel;
  362. const ParallelType v = Mode::loadU (src);
  363. mn = Mode::min (mn, v);
  364. mx = Mode::max (mx, v);
  365. }
  366. }
  367. Range<Type> result (Mode::min (mn),
  368. Mode::max (mx));
  369. num &= (Mode::numParallel - 1);
  370. src += Mode::numParallel;
  371. for (int i = 0; i < num; ++i)
  372. result = result.getUnionWith (src[i]);
  373. return result;
  374. }
  375. return Range<Type>::findMinAndMax (src, num);
  376. }
  377. };
  378. #endif
  379. }
  380. //==============================================================================
  381. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  382. {
  383. #if JUCE_USE_VDSP_FRAMEWORK
  384. vDSP_vclr (dest, 1, (size_t) num);
  385. #else
  386. zeromem (dest, num * sizeof (float));
  387. #endif
  388. }
  389. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  390. {
  391. #if JUCE_USE_VDSP_FRAMEWORK
  392. vDSP_vclrD (dest, 1, (size_t) num);
  393. #else
  394. zeromem (dest, num * sizeof (double));
  395. #endif
  396. }
  397. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  398. {
  399. #if JUCE_USE_VDSP_FRAMEWORK
  400. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  401. #else
  402. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  403. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  404. #endif
  405. }
  406. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  407. {
  408. #if JUCE_USE_VDSP_FRAMEWORK
  409. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  410. #else
  411. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  412. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  413. #endif
  414. }
  415. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  416. {
  417. memcpy (dest, src, (size_t) num * sizeof (float));
  418. }
  419. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  420. {
  421. memcpy (dest, src, (size_t) num * sizeof (double));
  422. }
  423. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  424. {
  425. #if JUCE_USE_VDSP_FRAMEWORK
  426. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  427. #else
  428. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  429. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  430. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  431. #endif
  432. }
  433. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  439. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  440. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  441. #endif
  442. }
  443. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  444. {
  445. #if JUCE_USE_VDSP_FRAMEWORK
  446. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  447. #else
  448. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  449. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  450. #endif
  451. }
  452. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  453. {
  454. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  455. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  456. }
  457. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float* src, float amount, int num) noexcept
  458. {
  459. #if JUCE_USE_VDSP_FRAMEWORK
  460. vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
  461. #else
  462. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  463. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  464. const Mode::ParallelType am = Mode::load1 (amount);)
  465. #endif
  466. }
  467. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double* src, double amount, int num) noexcept
  468. {
  469. #if JUCE_USE_VDSP_FRAMEWORK
  470. vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
  471. #else
  472. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  473. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  474. const Mode::ParallelType am = Mode::load1 (amount);)
  475. #endif
  476. }
  477. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  478. {
  479. #if JUCE_USE_VDSP_FRAMEWORK
  480. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  481. #else
  482. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  483. #endif
  484. }
  485. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  486. {
  487. #if JUCE_USE_VDSP_FRAMEWORK
  488. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  489. #else
  490. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  491. #endif
  492. }
  493. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  494. {
  495. #if JUCE_USE_VDSP_FRAMEWORK
  496. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  497. #else
  498. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  499. #endif
  500. }
  501. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  502. {
  503. #if JUCE_USE_VDSP_FRAMEWORK
  504. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  505. #else
  506. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  507. #endif
  508. }
  509. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  510. {
  511. #if JUCE_USE_VDSP_FRAMEWORK
  512. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  513. #else
  514. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  515. #endif
  516. }
  517. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  518. {
  519. #if JUCE_USE_VDSP_FRAMEWORK
  520. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  521. #else
  522. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  523. #endif
  524. }
  525. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  526. {
  527. #if JUCE_USE_VDSP_FRAMEWORK
  528. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  529. #else
  530. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  531. #endif
  532. }
  533. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  534. {
  535. #if JUCE_USE_VDSP_FRAMEWORK
  536. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  537. #else
  538. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  539. #endif
  540. }
  541. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  542. {
  543. #if JUCE_USE_VDSP_FRAMEWORK
  544. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  545. #else
  546. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  547. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  548. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  549. #endif
  550. }
  551. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  552. {
  553. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  554. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  555. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  556. }
  557. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  558. {
  559. #if JUCE_USE_VDSP_FRAMEWORK
  560. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  561. #else
  562. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  563. JUCE_LOAD_SRC1_SRC2_DEST,
  564. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  565. #endif
  566. }
  567. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  568. {
  569. #if JUCE_USE_VDSP_FRAMEWORK
  570. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  571. #else
  572. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  573. JUCE_LOAD_SRC1_SRC2_DEST,
  574. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  575. #endif
  576. }
  577. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  578. {
  579. #if JUCE_USE_VDSP_FRAMEWORK
  580. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  581. #else
  582. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  583. #endif
  584. }
  585. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  586. {
  587. #if JUCE_USE_VDSP_FRAMEWORK
  588. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  589. #else
  590. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  591. #endif
  592. }
  593. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  594. {
  595. #if JUCE_USE_VDSP_FRAMEWORK
  596. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  597. #else
  598. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  599. #endif
  600. }
  601. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  602. {
  603. #if JUCE_USE_VDSP_FRAMEWORK
  604. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  605. #else
  606. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  607. #endif
  608. }
  609. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  610. {
  611. #if JUCE_USE_VDSP_FRAMEWORK
  612. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  613. #else
  614. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  615. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  616. #endif
  617. }
  618. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  619. {
  620. #if JUCE_USE_VDSP_FRAMEWORK
  621. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  622. #else
  623. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  624. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  625. #endif
  626. }
  627. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  628. {
  629. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  630. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  631. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  632. }
  633. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  634. {
  635. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  636. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  637. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  638. }
  639. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  640. {
  641. #if JUCE_USE_VDSP_FRAMEWORK
  642. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  643. #else
  644. copyWithMultiply (dest, src, -1.0f, num);
  645. #endif
  646. }
  647. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  648. {
  649. #if JUCE_USE_VDSP_FRAMEWORK
  650. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  651. #else
  652. copyWithMultiply (dest, src, -1.0f, num);
  653. #endif
  654. }
  655. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  656. {
  657. #if JUCE_USE_VDSP_FRAMEWORK
  658. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  659. #else
  660. union {float f; uint32 i;} signMask;
  661. signMask.i = 0x7fffffffUL;
  662. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  663. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  664. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  665. #endif
  666. }
  667. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  668. {
  669. #if JUCE_USE_VDSP_FRAMEWORK
  670. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  671. #else
  672. union {double d; uint64 i;} signMask;
  673. signMask.i = 0x7fffffffffffffffULL;
  674. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  675. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  676. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  677. #endif
  678. }
  679. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  680. {
  681. #if JUCE_USE_ARM_NEON
  682. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  683. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  684. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  685. #else
  686. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  687. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  688. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  689. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  690. #endif
  691. }
  692. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  693. {
  694. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  695. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  696. const Mode::ParallelType cmp = Mode::load1 (comp);)
  697. }
  698. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  699. {
  700. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  701. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  702. const Mode::ParallelType cmp = Mode::load1 (comp);)
  703. }
  704. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  705. {
  706. #if JUCE_USE_VDSP_FRAMEWORK
  707. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  708. #else
  709. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  710. #endif
  711. }
  712. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  713. {
  714. #if JUCE_USE_VDSP_FRAMEWORK
  715. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  716. #else
  717. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  718. #endif
  719. }
  720. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  721. {
  722. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  723. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  724. const Mode::ParallelType cmp = Mode::load1 (comp);)
  725. }
  726. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  727. {
  728. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  729. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  730. const Mode::ParallelType cmp = Mode::load1 (comp);)
  731. }
  732. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  733. {
  734. #if JUCE_USE_VDSP_FRAMEWORK
  735. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  736. #else
  737. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  738. #endif
  739. }
  740. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  741. {
  742. #if JUCE_USE_VDSP_FRAMEWORK
  743. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  744. #else
  745. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  746. #endif
  747. }
  748. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  749. {
  750. jassert(high >= low);
  751. #if JUCE_USE_VDSP_FRAMEWORK
  752. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  753. #else
  754. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  755. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  756. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  757. #endif
  758. }
  759. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  760. {
  761. jassert(high >= low);
  762. #if JUCE_USE_VDSP_FRAMEWORK
  763. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  764. #else
  765. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  766. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  767. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  768. #endif
  769. }
  770. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  771. {
  772. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  773. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  774. #else
  775. return Range<float>::findMinAndMax (src, num);
  776. #endif
  777. }
  778. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  779. {
  780. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  781. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  782. #else
  783. return Range<double>::findMinAndMax (src, num);
  784. #endif
  785. }
  786. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  787. {
  788. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  789. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  790. #else
  791. return juce::findMinimum (src, num);
  792. #endif
  793. }
  794. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  795. {
  796. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  797. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  798. #else
  799. return juce::findMinimum (src, num);
  800. #endif
  801. }
  802. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  803. {
  804. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  805. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  806. #else
  807. return juce::findMaximum (src, num);
  808. #endif
  809. }
  810. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  811. {
  812. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  813. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  814. #else
  815. return juce::findMaximum (src, num);
  816. #endif
  817. }
  818. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  819. {
  820. #if JUCE_USE_SSE_INTRINSICS
  821. if (FloatVectorHelpers::isSSE2Available())
  822. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  823. #endif
  824. (void) shouldEnable;
  825. }
  826. //==============================================================================
  827. //==============================================================================
  828. #if JUCE_UNIT_TESTS
  829. class FloatVectorOperationsTests : public UnitTest
  830. {
  831. public:
  832. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  833. template <typename ValueType>
  834. struct TestRunner
  835. {
  836. static void runTest (UnitTest& u, Random random)
  837. {
  838. const int range = random.nextBool() ? 500 : 10;
  839. const int num = random.nextInt (range) + 1;
  840. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  841. HeapBlock<int> buffer3 ((size_t) num + 16);
  842. #if JUCE_ARM
  843. ValueType* const data1 = buffer1;
  844. ValueType* const data2 = buffer2;
  845. int* const int1 = buffer3;
  846. #else
  847. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  848. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  849. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  850. #endif
  851. fillRandomly (random, data1, num);
  852. fillRandomly (random, data2, num);
  853. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  854. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  855. u.expect (minMax1 == minMax2);
  856. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  857. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  858. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  859. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  860. FloatVectorOperations::clear (data1, num);
  861. u.expect (areAllValuesEqual (data1, num, 0));
  862. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  863. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  864. FloatVectorOperations::add (data1, (ValueType) 2, num);
  865. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  866. FloatVectorOperations::copy (data2, data1, num);
  867. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  868. FloatVectorOperations::add (data2, data1, num);
  869. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  870. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  871. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  872. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  873. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  874. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  875. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  876. FloatVectorOperations::multiply (data1, data2, num);
  877. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  878. FloatVectorOperations::negate (data2, data1, num);
  879. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  880. FloatVectorOperations::subtract (data1, data2, num);
  881. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  882. FloatVectorOperations::abs (data1, data2, num);
  883. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  884. FloatVectorOperations::abs (data2, data1, num);
  885. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  886. fillRandomly (random, int1, num);
  887. doConversionTest (u, data1, data2, int1, num);
  888. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  889. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  890. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  891. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  892. }
  893. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  894. {
  895. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  896. convertFixed (data2, int1, 2.0f, num);
  897. u.expect (buffersMatch (data1, data2, num));
  898. }
  899. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  900. static void fillRandomly (Random& random, ValueType* d, int num)
  901. {
  902. while (--num >= 0)
  903. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  904. }
  905. static void fillRandomly (Random& random, int* d, int num)
  906. {
  907. while (--num >= 0)
  908. *d++ = random.nextInt();
  909. }
  910. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  911. {
  912. while (--num >= 0)
  913. *d++ = *s++ * multiplier;
  914. }
  915. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  916. {
  917. while (--num >= 0)
  918. if (*d++ != target)
  919. return false;
  920. return true;
  921. }
  922. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  923. {
  924. while (--num >= 0)
  925. if (! valuesMatch (*d1++, *d2++))
  926. return false;
  927. return true;
  928. }
  929. static bool valuesMatch (ValueType v1, ValueType v2)
  930. {
  931. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  932. }
  933. };
  934. void runTest()
  935. {
  936. beginTest ("FloatVectorOperations");
  937. for (int i = 1000; --i >= 0;)
  938. {
  939. TestRunner<float>::runTest (*this, getRandom());
  940. TestRunner<double>::runTest (*this, getRandom());
  941. }
  942. }
  943. };
  944. static FloatVectorOperationsTests vectorOpTests;
  945. #endif