The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1166 lines
52KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. inline static bool isAligned (const void* p) noexcept
  24. {
  25. return (((pointer_sized_int) p) & 15) == 0;
  26. }
  27. struct BasicOps32
  28. {
  29. typedef float Type;
  30. typedef __m128 ParallelType;
  31. typedef __m128 IntegerType;
  32. enum { numParallel = 4 };
  33. // Integer and parallel types are the same for SSE. On neon they have different types
  34. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  35. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  36. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  37. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  38. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  39. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  40. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  41. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  42. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  43. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  44. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  45. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  46. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  47. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  48. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  49. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  50. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  51. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  52. };
  53. struct BasicOps64
  54. {
  55. typedef double Type;
  56. typedef __m128d ParallelType;
  57. typedef __m128d IntegerType;
  58. enum { numParallel = 2 };
  59. // Integer and parallel types are the same for SSE. On neon they have different types
  60. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  61. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  62. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  63. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  64. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  65. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  66. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  67. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  68. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  69. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  70. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  71. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  72. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  73. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  74. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  75. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  76. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  77. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  78. };
  79. #define JUCE_BEGIN_VEC_OP \
  80. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  81. { \
  82. const int numLongOps = num / Mode::numParallel;
  83. #define JUCE_FINISH_VEC_OP(normalOp) \
  84. num &= (Mode::numParallel - 1); \
  85. if (num == 0) return; \
  86. } \
  87. for (int i = 0; i < num; ++i) normalOp;
  88. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  89. JUCE_BEGIN_VEC_OP \
  90. setupOp \
  91. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  92. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  93. JUCE_FINISH_VEC_OP (normalOp)
  94. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  95. JUCE_BEGIN_VEC_OP \
  96. setupOp \
  97. if (FloatVectorHelpers::isAligned (dest)) \
  98. { \
  99. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  100. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  101. }\
  102. else \
  103. { \
  104. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  105. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  106. } \
  107. JUCE_FINISH_VEC_OP (normalOp)
  108. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  109. JUCE_BEGIN_VEC_OP \
  110. setupOp \
  111. if (FloatVectorHelpers::isAligned (dest)) \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src1)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  116. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  117. } \
  118. else \
  119. { \
  120. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  121. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  122. } \
  123. } \
  124. else \
  125. { \
  126. if (FloatVectorHelpers::isAligned (src1)) \
  127. { \
  128. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  129. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  130. } \
  131. else \
  132. { \
  133. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  134. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  135. } \
  136. } \
  137. JUCE_FINISH_VEC_OP (normalOp)
  138. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  139. JUCE_BEGIN_VEC_OP \
  140. setupOp \
  141. if (FloatVectorHelpers::isAligned (dest)) \
  142. { \
  143. if (FloatVectorHelpers::isAligned (src1)) \
  144. { \
  145. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  146. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  147. } \
  148. else \
  149. { \
  150. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  151. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  152. } \
  153. } \
  154. else \
  155. { \
  156. if (FloatVectorHelpers::isAligned (src1)) \
  157. { \
  158. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  159. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  160. } \
  161. else \
  162. { \
  163. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  164. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  165. } \
  166. } \
  167. JUCE_FINISH_VEC_OP (normalOp)
  168. //==============================================================================
  169. #elif JUCE_USE_ARM_NEON
  170. struct BasicOps32
  171. {
  172. typedef float Type;
  173. typedef float32x4_t ParallelType;
  174. typedef uint32x4_t IntegerType;
  175. enum { numParallel = 4 };
  176. static forcedinline IntegerType toint (ParallelType v) noexcept { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  177. static forcedinline ParallelType toflt (IntegerType v) noexcept { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  178. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  179. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  180. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  181. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  182. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  183. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  184. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  185. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  186. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  187. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  188. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  189. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  190. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  191. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  192. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  193. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  194. };
  195. struct BasicOps64
  196. {
  197. typedef double Type;
  198. typedef double ParallelType;
  199. typedef uint64 IntegerType;
  200. enum { numParallel = 1 };
  201. static forcedinline IntegerType toint (ParallelType v) noexcept { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  202. static forcedinline ParallelType toflt (IntegerType v) noexcept { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  203. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  204. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  205. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  206. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  207. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  208. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  209. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  210. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  211. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  212. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  213. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  214. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  215. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  216. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  217. static forcedinline Type max (ParallelType a) noexcept { return a; }
  218. static forcedinline Type min (ParallelType a) noexcept { return a; }
  219. };
  220. #define JUCE_BEGIN_VEC_OP \
  221. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  222. if (Mode::numParallel > 1) \
  223. { \
  224. const int numLongOps = num / Mode::numParallel;
  225. #define JUCE_FINISH_VEC_OP(normalOp) \
  226. num &= (Mode::numParallel - 1); \
  227. if (num == 0) return; \
  228. } \
  229. for (int i = 0; i < num; ++i) normalOp;
  230. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  231. JUCE_BEGIN_VEC_OP \
  232. setupOp \
  233. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  234. JUCE_FINISH_VEC_OP (normalOp)
  235. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  236. JUCE_BEGIN_VEC_OP \
  237. setupOp \
  238. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  239. JUCE_FINISH_VEC_OP (normalOp)
  240. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  241. JUCE_BEGIN_VEC_OP \
  242. setupOp \
  243. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  244. JUCE_FINISH_VEC_OP (normalOp)
  245. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  246. JUCE_BEGIN_VEC_OP \
  247. setupOp \
  248. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  249. JUCE_FINISH_VEC_OP (normalOp)
  250. //==============================================================================
  251. #else
  252. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  253. for (int i = 0; i < num; ++i) normalOp;
  254. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  255. for (int i = 0; i < num; ++i) normalOp;
  256. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  257. for (int i = 0; i < num; ++i) normalOp;
  258. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  259. for (int i = 0; i < num; ++i) normalOp;
  260. #endif
  261. //==============================================================================
  262. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  263. for (int i = 0; i < numLongOps; ++i) \
  264. { \
  265. locals (srcLoad, dstLoad); \
  266. dstStore (dest, vecOp); \
  267. increment; \
  268. }
  269. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  270. for (int i = 0; i < numLongOps; ++i) \
  271. { \
  272. locals (src1Load, src2Load); \
  273. dstStore (dest, vecOp); \
  274. increment; \
  275. }
  276. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  277. for (int i = 0; i < numLongOps; ++i) \
  278. { \
  279. locals (src1Load, src2Load, dstLoad); \
  280. dstStore (dest, vecOp); \
  281. increment; \
  282. }
  283. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  284. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  285. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  286. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  287. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  288. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  289. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  290. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  291. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  292. template <typename Mode>
  293. struct MinMax
  294. {
  295. typedef typename Mode::Type Type;
  296. typedef typename Mode::ParallelType ParallelType;
  297. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  298. {
  299. int numLongOps = num / Mode::numParallel;
  300. if (numLongOps > 1)
  301. {
  302. ParallelType val;
  303. #if ! JUCE_USE_ARM_NEON
  304. if (isAligned (src))
  305. {
  306. val = Mode::loadA (src);
  307. if (isMinimum)
  308. {
  309. while (--numLongOps > 0)
  310. {
  311. src += Mode::numParallel;
  312. val = Mode::min (val, Mode::loadA (src));
  313. }
  314. }
  315. else
  316. {
  317. while (--numLongOps > 0)
  318. {
  319. src += Mode::numParallel;
  320. val = Mode::max (val, Mode::loadA (src));
  321. }
  322. }
  323. }
  324. else
  325. #endif
  326. {
  327. val = Mode::loadU (src);
  328. if (isMinimum)
  329. {
  330. while (--numLongOps > 0)
  331. {
  332. src += Mode::numParallel;
  333. val = Mode::min (val, Mode::loadU (src));
  334. }
  335. }
  336. else
  337. {
  338. while (--numLongOps > 0)
  339. {
  340. src += Mode::numParallel;
  341. val = Mode::max (val, Mode::loadU (src));
  342. }
  343. }
  344. }
  345. Type result = isMinimum ? Mode::min (val)
  346. : Mode::max (val);
  347. num &= (Mode::numParallel - 1);
  348. src += Mode::numParallel;
  349. for (int i = 0; i < num; ++i)
  350. result = isMinimum ? jmin (result, src[i])
  351. : jmax (result, src[i]);
  352. return result;
  353. }
  354. return isMinimum ? juce::findMinimum (src, num)
  355. : juce::findMaximum (src, num);
  356. }
  357. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  358. {
  359. int numLongOps = num / Mode::numParallel;
  360. if (numLongOps > 1)
  361. {
  362. ParallelType mn, mx;
  363. #if ! JUCE_USE_ARM_NEON
  364. if (isAligned (src))
  365. {
  366. mn = Mode::loadA (src);
  367. mx = mn;
  368. while (--numLongOps > 0)
  369. {
  370. src += Mode::numParallel;
  371. const ParallelType v = Mode::loadA (src);
  372. mn = Mode::min (mn, v);
  373. mx = Mode::max (mx, v);
  374. }
  375. }
  376. else
  377. #endif
  378. {
  379. mn = Mode::loadU (src);
  380. mx = mn;
  381. while (--numLongOps > 0)
  382. {
  383. src += Mode::numParallel;
  384. const ParallelType v = Mode::loadU (src);
  385. mn = Mode::min (mn, v);
  386. mx = Mode::max (mx, v);
  387. }
  388. }
  389. Range<Type> result (Mode::min (mn),
  390. Mode::max (mx));
  391. num &= (Mode::numParallel - 1);
  392. src += Mode::numParallel;
  393. for (int i = 0; i < num; ++i)
  394. result = result.getUnionWith (src[i]);
  395. return result;
  396. }
  397. return Range<Type>::findMinAndMax (src, num);
  398. }
  399. };
  400. #endif
  401. }
  402. //==============================================================================
  403. namespace
  404. {
  405. #if JUCE_USE_VDSP_FRAMEWORK
  406. // This casts away constness to account for slightly different vDSP function signatures
  407. // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
  408. template <typename ValueType>
  409. ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
  410. #endif
  411. }
  412. //==============================================================================
  413. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  414. {
  415. #if JUCE_USE_VDSP_FRAMEWORK
  416. vDSP_vclr (dest, 1, (size_t) num);
  417. #else
  418. zeromem (dest, (size_t) num * sizeof (float));
  419. #endif
  420. }
  421. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  422. {
  423. #if JUCE_USE_VDSP_FRAMEWORK
  424. vDSP_vclrD (dest, 1, (size_t) num);
  425. #else
  426. zeromem (dest, (size_t) num * sizeof (double));
  427. #endif
  428. }
  429. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  430. {
  431. #if JUCE_USE_VDSP_FRAMEWORK
  432. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  433. #else
  434. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  435. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  436. #endif
  437. }
  438. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  439. {
  440. #if JUCE_USE_VDSP_FRAMEWORK
  441. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  442. #else
  443. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  444. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  445. #endif
  446. }
  447. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  448. {
  449. memcpy (dest, src, (size_t) num * sizeof (float));
  450. }
  451. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  452. {
  453. memcpy (dest, src, (size_t) num * sizeof (double));
  454. }
  455. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  456. {
  457. #if JUCE_USE_VDSP_FRAMEWORK
  458. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  459. #else
  460. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  461. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  462. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  463. #endif
  464. }
  465. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  466. {
  467. #if JUCE_USE_VDSP_FRAMEWORK
  468. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  469. #else
  470. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  471. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  472. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  473. #endif
  474. }
  475. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  476. {
  477. #if JUCE_USE_VDSP_FRAMEWORK
  478. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  479. #else
  480. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  481. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  482. #endif
  483. }
  484. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  485. {
  486. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  487. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  488. }
  489. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
  490. {
  491. #if JUCE_USE_VDSP_FRAMEWORK
  492. vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  493. #else
  494. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  495. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  496. const Mode::ParallelType am = Mode::load1 (amount);)
  497. #endif
  498. }
  499. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
  500. {
  501. #if JUCE_USE_VDSP_FRAMEWORK
  502. vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  503. #else
  504. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  505. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  506. const Mode::ParallelType am = Mode::load1 (amount);)
  507. #endif
  508. }
  509. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  510. {
  511. #if JUCE_USE_VDSP_FRAMEWORK
  512. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  513. #else
  514. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  515. #endif
  516. }
  517. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  518. {
  519. #if JUCE_USE_VDSP_FRAMEWORK
  520. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  521. #else
  522. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  523. #endif
  524. }
  525. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  526. {
  527. #if JUCE_USE_VDSP_FRAMEWORK
  528. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  529. #else
  530. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  531. #endif
  532. }
  533. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  534. {
  535. #if JUCE_USE_VDSP_FRAMEWORK
  536. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  537. #else
  538. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  539. #endif
  540. }
  541. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  542. {
  543. #if JUCE_USE_VDSP_FRAMEWORK
  544. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  545. #else
  546. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  547. #endif
  548. }
  549. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  550. {
  551. #if JUCE_USE_VDSP_FRAMEWORK
  552. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  553. #else
  554. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  555. #endif
  556. }
  557. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  558. {
  559. #if JUCE_USE_VDSP_FRAMEWORK
  560. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  561. #else
  562. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  563. #endif
  564. }
  565. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  566. {
  567. #if JUCE_USE_VDSP_FRAMEWORK
  568. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  569. #else
  570. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  571. #endif
  572. }
  573. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  574. {
  575. #if JUCE_USE_VDSP_FRAMEWORK
  576. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  577. #else
  578. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  579. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  580. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  581. #endif
  582. }
  583. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  584. {
  585. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  586. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  587. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  588. }
  589. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  590. {
  591. #if JUCE_USE_VDSP_FRAMEWORK
  592. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  593. #else
  594. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  595. JUCE_LOAD_SRC1_SRC2_DEST,
  596. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  597. #endif
  598. }
  599. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  600. {
  601. #if JUCE_USE_VDSP_FRAMEWORK
  602. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  603. #else
  604. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  605. JUCE_LOAD_SRC1_SRC2_DEST,
  606. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  607. #endif
  608. }
  609. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  610. {
  611. #if JUCE_USE_VDSP_FRAMEWORK
  612. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  613. #else
  614. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  615. #endif
  616. }
  617. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  618. {
  619. #if JUCE_USE_VDSP_FRAMEWORK
  620. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  621. #else
  622. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  623. #endif
  624. }
  625. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  626. {
  627. #if JUCE_USE_VDSP_FRAMEWORK
  628. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  629. #else
  630. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  631. #endif
  632. }
  633. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  634. {
  635. #if JUCE_USE_VDSP_FRAMEWORK
  636. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  637. #else
  638. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  639. #endif
  640. }
  641. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  642. {
  643. #if JUCE_USE_VDSP_FRAMEWORK
  644. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  645. #else
  646. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  647. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  648. #endif
  649. }
  650. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  651. {
  652. #if JUCE_USE_VDSP_FRAMEWORK
  653. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  654. #else
  655. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  656. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  657. #endif
  658. }
  659. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  660. {
  661. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  662. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  663. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  664. }
  665. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  666. {
  667. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  668. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  669. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  670. }
  671. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  672. {
  673. #if JUCE_USE_VDSP_FRAMEWORK
  674. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  675. #else
  676. copyWithMultiply (dest, src, -1.0f, num);
  677. #endif
  678. }
  679. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  680. {
  681. #if JUCE_USE_VDSP_FRAMEWORK
  682. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  683. #else
  684. copyWithMultiply (dest, src, -1.0f, num);
  685. #endif
  686. }
  687. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  688. {
  689. #if JUCE_USE_VDSP_FRAMEWORK
  690. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  691. #else
  692. union { float f; uint32 i; } signMask;
  693. signMask.i = 0x7fffffffUL;
  694. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  695. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  696. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  697. ignoreUnused (signMask);
  698. #endif
  699. }
  700. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  701. {
  702. #if JUCE_USE_VDSP_FRAMEWORK
  703. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  704. #else
  705. union {double d; uint64 i;} signMask;
  706. signMask.i = 0x7fffffffffffffffULL;
  707. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  708. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  709. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  710. ignoreUnused (signMask);
  711. #endif
  712. }
  713. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  714. {
  715. #if JUCE_USE_ARM_NEON
  716. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  717. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  718. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  719. #else
  720. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  721. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  722. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  723. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  724. #endif
  725. }
  726. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  727. {
  728. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  729. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  730. const Mode::ParallelType cmp = Mode::load1 (comp);)
  731. }
  732. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  733. {
  734. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  735. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  736. const Mode::ParallelType cmp = Mode::load1 (comp);)
  737. }
  738. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  739. {
  740. #if JUCE_USE_VDSP_FRAMEWORK
  741. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  742. #else
  743. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  744. #endif
  745. }
  746. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  747. {
  748. #if JUCE_USE_VDSP_FRAMEWORK
  749. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  750. #else
  751. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  752. #endif
  753. }
  754. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  755. {
  756. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  757. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  758. const Mode::ParallelType cmp = Mode::load1 (comp);)
  759. }
  760. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  761. {
  762. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  763. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  764. const Mode::ParallelType cmp = Mode::load1 (comp);)
  765. }
  766. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  767. {
  768. #if JUCE_USE_VDSP_FRAMEWORK
  769. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  770. #else
  771. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  772. #endif
  773. }
  774. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  775. {
  776. #if JUCE_USE_VDSP_FRAMEWORK
  777. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  778. #else
  779. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  780. #endif
  781. }
  782. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  783. {
  784. jassert(high >= low);
  785. #if JUCE_USE_VDSP_FRAMEWORK
  786. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  787. #else
  788. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  789. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  790. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  791. #endif
  792. }
  793. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  794. {
  795. jassert(high >= low);
  796. #if JUCE_USE_VDSP_FRAMEWORK
  797. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  798. #else
  799. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  800. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  801. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  802. #endif
  803. }
  804. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  805. {
  806. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  807. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  808. #else
  809. return Range<float>::findMinAndMax (src, num);
  810. #endif
  811. }
  812. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  813. {
  814. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  815. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  816. #else
  817. return Range<double>::findMinAndMax (src, num);
  818. #endif
  819. }
  820. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  821. {
  822. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  823. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  824. #else
  825. return juce::findMinimum (src, num);
  826. #endif
  827. }
  828. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  829. {
  830. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  831. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  832. #else
  833. return juce::findMinimum (src, num);
  834. #endif
  835. }
  836. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  837. {
  838. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  839. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  840. #else
  841. return juce::findMaximum (src, num);
  842. #endif
  843. }
  844. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  845. {
  846. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  847. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  848. #else
  849. return juce::findMaximum (src, num);
  850. #endif
  851. }
  852. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  853. {
  854. #if JUCE_USE_SSE_INTRINSICS
  855. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  856. #endif
  857. ignoreUnused (shouldEnable);
  858. }
  859. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept
  860. {
  861. #if JUCE_USE_SSE_INTRINSICS
  862. const unsigned int mxcsr = _mm_getcsr();
  863. _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
  864. #endif
  865. }
  866. //==============================================================================
  867. //==============================================================================
  868. #if JUCE_UNIT_TESTS
  869. class FloatVectorOperationsTests : public UnitTest
  870. {
  871. public:
  872. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  873. template <typename ValueType>
  874. struct TestRunner
  875. {
  876. static void runTest (UnitTest& u, Random random)
  877. {
  878. const int range = random.nextBool() ? 500 : 10;
  879. const int num = random.nextInt (range) + 1;
  880. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  881. HeapBlock<int> buffer3 ((size_t) num + 16);
  882. #if JUCE_ARM
  883. ValueType* const data1 = buffer1;
  884. ValueType* const data2 = buffer2;
  885. int* const int1 = buffer3;
  886. #else
  887. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  888. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  889. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  890. #endif
  891. fillRandomly (random, data1, num);
  892. fillRandomly (random, data2, num);
  893. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  894. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  895. u.expect (minMax1 == minMax2);
  896. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  897. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  898. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  899. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  900. FloatVectorOperations::clear (data1, num);
  901. u.expect (areAllValuesEqual (data1, num, 0));
  902. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  903. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  904. FloatVectorOperations::add (data1, (ValueType) 2, num);
  905. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  906. FloatVectorOperations::copy (data2, data1, num);
  907. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  908. FloatVectorOperations::add (data2, data1, num);
  909. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  910. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  911. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  912. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  913. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  914. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  915. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  916. FloatVectorOperations::multiply (data1, data2, num);
  917. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  918. FloatVectorOperations::negate (data2, data1, num);
  919. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  920. FloatVectorOperations::subtract (data1, data2, num);
  921. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  922. FloatVectorOperations::abs (data1, data2, num);
  923. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  924. FloatVectorOperations::abs (data2, data1, num);
  925. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  926. fillRandomly (random, int1, num);
  927. doConversionTest (u, data1, data2, int1, num);
  928. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  929. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  930. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  931. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  932. }
  933. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  934. {
  935. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  936. convertFixed (data2, int1, 2.0f, num);
  937. u.expect (buffersMatch (data1, data2, num));
  938. }
  939. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  940. static void fillRandomly (Random& random, ValueType* d, int num)
  941. {
  942. while (--num >= 0)
  943. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  944. }
  945. static void fillRandomly (Random& random, int* d, int num)
  946. {
  947. while (--num >= 0)
  948. *d++ = random.nextInt();
  949. }
  950. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  951. {
  952. while (--num >= 0)
  953. *d++ = *s++ * multiplier;
  954. }
  955. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  956. {
  957. while (--num >= 0)
  958. if (*d++ != target)
  959. return false;
  960. return true;
  961. }
  962. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  963. {
  964. while (--num >= 0)
  965. if (! valuesMatch (*d1++, *d2++))
  966. return false;
  967. return true;
  968. }
  969. static bool valuesMatch (ValueType v1, ValueType v2)
  970. {
  971. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  972. }
  973. };
  974. void runTest() override
  975. {
  976. beginTest ("FloatVectorOperations");
  977. for (int i = 1000; --i >= 0;)
  978. {
  979. TestRunner<float>::runTest (*this, getRandom());
  980. TestRunner<double>::runTest (*this, getRandom());
  981. }
  982. }
  983. };
  984. static FloatVectorOperationsTests vectorOpTests;
  985. #endif