The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1717 lines
77KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2022 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. The code included in this file is provided under the terms of the ISC license
  8. http://www.isc.org/downloads/software-support-policy/isc-license. Permission
  9. To use, copy, modify, and/or distribute this software for any purpose with or
  10. without fee is hereby granted provided that the above copyright notice and
  11. this permission notice appear in all copies.
  12. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  13. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  14. DISCLAIMED.
  15. ==============================================================================
  16. */
  17. namespace juce
  18. {
  19. namespace FloatVectorHelpers
  20. {
  21. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  22. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  23. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  24. #if JUCE_USE_SSE_INTRINSICS
  25. static bool isAligned (const void* p) noexcept
  26. {
  27. return (((pointer_sized_int) p) & 15) == 0;
  28. }
  29. struct BasicOps32
  30. {
  31. using Type = float;
  32. using ParallelType = __m128;
  33. using IntegerType = __m128;
  34. enum { numParallel = 4 };
  35. // Integer and parallel types are the same for SSE. On neon they have different types
  36. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  37. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  38. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  39. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  40. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  41. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  42. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  43. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  44. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  45. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  46. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  47. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  48. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  49. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  50. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  51. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  52. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  53. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  54. };
  55. struct BasicOps64
  56. {
  57. using Type = double;
  58. using ParallelType = __m128d;
  59. using IntegerType = __m128d;
  60. enum { numParallel = 2 };
  61. // Integer and parallel types are the same for SSE. On neon they have different types
  62. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  63. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  64. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  65. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  66. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  67. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  68. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  69. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  70. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  71. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  72. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  73. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  74. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  75. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  76. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  77. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  78. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  79. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  80. };
  81. #define JUCE_BEGIN_VEC_OP \
  82. using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
  83. { \
  84. const auto numLongOps = num / Mode::numParallel;
  85. #define JUCE_FINISH_VEC_OP(normalOp) \
  86. num &= (Mode::numParallel - 1); \
  87. if (num == 0) return; \
  88. } \
  89. for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
  90. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  91. JUCE_BEGIN_VEC_OP \
  92. setupOp \
  93. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  94. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  95. JUCE_FINISH_VEC_OP (normalOp)
  96. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  97. JUCE_BEGIN_VEC_OP \
  98. setupOp \
  99. if (FloatVectorHelpers::isAligned (dest)) \
  100. { \
  101. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  102. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  103. }\
  104. else \
  105. { \
  106. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  107. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  108. } \
  109. JUCE_FINISH_VEC_OP (normalOp)
  110. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  111. JUCE_BEGIN_VEC_OP \
  112. setupOp \
  113. if (FloatVectorHelpers::isAligned (dest)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src1)) \
  116. { \
  117. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  118. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  119. } \
  120. else \
  121. { \
  122. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  123. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  124. } \
  125. } \
  126. else \
  127. { \
  128. if (FloatVectorHelpers::isAligned (src1)) \
  129. { \
  130. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  131. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  132. } \
  133. else \
  134. { \
  135. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  136. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  137. } \
  138. } \
  139. JUCE_FINISH_VEC_OP (normalOp)
  140. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  141. JUCE_BEGIN_VEC_OP \
  142. setupOp \
  143. if (FloatVectorHelpers::isAligned (dest)) \
  144. { \
  145. if (FloatVectorHelpers::isAligned (src1)) \
  146. { \
  147. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  148. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  149. } \
  150. else \
  151. { \
  152. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  153. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  154. } \
  155. } \
  156. else \
  157. { \
  158. if (FloatVectorHelpers::isAligned (src1)) \
  159. { \
  160. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  161. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  162. } \
  163. else \
  164. { \
  165. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  166. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  167. } \
  168. } \
  169. JUCE_FINISH_VEC_OP (normalOp)
  170. //==============================================================================
  171. #elif JUCE_USE_ARM_NEON
  172. struct BasicOps32
  173. {
  174. using Type = float;
  175. using ParallelType = float32x4_t;
  176. using IntegerType = uint32x4_t;
  177. union signMaskUnion { ParallelType f; IntegerType i; };
  178. enum { numParallel = 4 };
  179. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  180. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  181. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  182. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  183. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  184. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  185. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  186. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  187. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  188. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  189. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  190. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  191. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  192. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  193. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  194. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  195. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  196. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  197. };
  198. struct BasicOps64
  199. {
  200. using Type = double;
  201. using ParallelType = double;
  202. using IntegerType = uint64;
  203. union signMaskUnion { ParallelType f; IntegerType i; };
  204. enum { numParallel = 1 };
  205. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  206. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  207. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  208. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  209. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  210. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  211. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  212. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  213. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  214. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  215. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  216. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  217. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  218. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  219. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  220. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  221. static forcedinline Type max (ParallelType a) noexcept { return a; }
  222. static forcedinline Type min (ParallelType a) noexcept { return a; }
  223. };
  224. #define JUCE_BEGIN_VEC_OP \
  225. using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
  226. if (Mode::numParallel > 1) \
  227. { \
  228. const auto numLongOps = num / Mode::numParallel;
  229. #define JUCE_FINISH_VEC_OP(normalOp) \
  230. num &= (Mode::numParallel - 1); \
  231. if (num == 0) return; \
  232. } \
  233. for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
  234. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  235. JUCE_BEGIN_VEC_OP \
  236. setupOp \
  237. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  238. JUCE_FINISH_VEC_OP (normalOp)
  239. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  240. JUCE_BEGIN_VEC_OP \
  241. setupOp \
  242. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  243. JUCE_FINISH_VEC_OP (normalOp)
  244. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  245. JUCE_BEGIN_VEC_OP \
  246. setupOp \
  247. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  248. JUCE_FINISH_VEC_OP (normalOp)
  249. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  250. JUCE_BEGIN_VEC_OP \
  251. setupOp \
  252. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  253. JUCE_FINISH_VEC_OP (normalOp)
  254. //==============================================================================
  255. #else
  256. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  257. for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
  258. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  259. for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
  260. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  261. for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
  262. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  263. for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
  264. #endif
  265. //==============================================================================
  266. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  267. for (auto i = (decltype (numLongOps)) 0; i < numLongOps; ++i) \
  268. { \
  269. locals (srcLoad, dstLoad); \
  270. dstStore (dest, vecOp); \
  271. increment; \
  272. }
  273. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  274. for (auto i = (decltype (numLongOps)) 0; i < numLongOps; ++i) \
  275. { \
  276. locals (src1Load, src2Load); \
  277. dstStore (dest, vecOp); \
  278. increment; \
  279. }
  280. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  281. for (auto i = (decltype (numLongOps)) 0; i < numLongOps; ++i) \
  282. { \
  283. locals (src1Load, src2Load, dstLoad); \
  284. dstStore (dest, vecOp); \
  285. increment; \
  286. }
  287. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  288. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  289. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  290. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  291. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  292. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  293. union signMask32 { float f; uint32 i; };
  294. union signMask64 { double d; uint64 i; };
  295. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  296. template <int typeSize> struct ModeType { using Mode = BasicOps32; };
  297. template <> struct ModeType<8> { using Mode = BasicOps64; };
  298. template <typename Mode>
  299. struct MinMax
  300. {
  301. using Type = typename Mode::Type;
  302. using ParallelType = typename Mode::ParallelType;
  303. template <typename Size>
  304. static Type findMinOrMax (const Type* src, Size num, const bool isMinimum) noexcept
  305. {
  306. auto numLongOps = num / Mode::numParallel;
  307. if (numLongOps > 1)
  308. {
  309. ParallelType val;
  310. #if ! JUCE_USE_ARM_NEON
  311. if (isAligned (src))
  312. {
  313. val = Mode::loadA (src);
  314. if (isMinimum)
  315. {
  316. while (--numLongOps > 0)
  317. {
  318. src += Mode::numParallel;
  319. val = Mode::min (val, Mode::loadA (src));
  320. }
  321. }
  322. else
  323. {
  324. while (--numLongOps > 0)
  325. {
  326. src += Mode::numParallel;
  327. val = Mode::max (val, Mode::loadA (src));
  328. }
  329. }
  330. }
  331. else
  332. #endif
  333. {
  334. val = Mode::loadU (src);
  335. if (isMinimum)
  336. {
  337. while (--numLongOps > 0)
  338. {
  339. src += Mode::numParallel;
  340. val = Mode::min (val, Mode::loadU (src));
  341. }
  342. }
  343. else
  344. {
  345. while (--numLongOps > 0)
  346. {
  347. src += Mode::numParallel;
  348. val = Mode::max (val, Mode::loadU (src));
  349. }
  350. }
  351. }
  352. Type result = isMinimum ? Mode::min (val)
  353. : Mode::max (val);
  354. num &= (Mode::numParallel - 1);
  355. src += Mode::numParallel;
  356. for (auto i = (decltype (num)) 0; i < num; ++i)
  357. result = isMinimum ? jmin (result, src[i])
  358. : jmax (result, src[i]);
  359. return result;
  360. }
  361. if (num <= 0)
  362. return 0;
  363. return isMinimum ? *std::min_element (src, src + num)
  364. : *std::max_element (src, src + num);
  365. }
  366. template <typename Size>
  367. static Range<Type> findMinAndMax (const Type* src, Size num) noexcept
  368. {
  369. auto numLongOps = num / Mode::numParallel;
  370. if (numLongOps > 1)
  371. {
  372. ParallelType mn, mx;
  373. #if ! JUCE_USE_ARM_NEON
  374. if (isAligned (src))
  375. {
  376. mn = Mode::loadA (src);
  377. mx = mn;
  378. while (--numLongOps > 0)
  379. {
  380. src += Mode::numParallel;
  381. const ParallelType v = Mode::loadA (src);
  382. mn = Mode::min (mn, v);
  383. mx = Mode::max (mx, v);
  384. }
  385. }
  386. else
  387. #endif
  388. {
  389. mn = Mode::loadU (src);
  390. mx = mn;
  391. while (--numLongOps > 0)
  392. {
  393. src += Mode::numParallel;
  394. const ParallelType v = Mode::loadU (src);
  395. mn = Mode::min (mn, v);
  396. mx = Mode::max (mx, v);
  397. }
  398. }
  399. Range<Type> result (Mode::min (mn),
  400. Mode::max (mx));
  401. num &= (Mode::numParallel - 1);
  402. src += Mode::numParallel;
  403. for (auto i = (decltype (num)) 0; i < num; ++i)
  404. result = result.getUnionWith (src[i]);
  405. return result;
  406. }
  407. return Range<Type>::findMinAndMax (src, num);
  408. }
  409. };
  410. #endif
  411. //==============================================================================
  412. namespace
  413. {
  414. template <typename Size>
  415. void clear (float* dest, Size num) noexcept
  416. {
  417. #if JUCE_USE_VDSP_FRAMEWORK
  418. vDSP_vclr (dest, 1, (vDSP_Length) num);
  419. #else
  420. zeromem (dest, (size_t) num * sizeof (float));
  421. #endif
  422. }
  423. template <typename Size>
  424. void clear (double* dest, Size num) noexcept
  425. {
  426. #if JUCE_USE_VDSP_FRAMEWORK
  427. vDSP_vclrD (dest, 1, (vDSP_Length) num);
  428. #else
  429. zeromem (dest, (size_t) num * sizeof (double));
  430. #endif
  431. }
  432. template <typename Size>
  433. void fill (float* dest, float valueToFill, Size num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vfill (&valueToFill, dest, 1, (vDSP_Length) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill,
  439. val,
  440. JUCE_LOAD_NONE,
  441. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  442. #endif
  443. }
  444. template <typename Size>
  445. void fill (double* dest, double valueToFill, Size num) noexcept
  446. {
  447. #if JUCE_USE_VDSP_FRAMEWORK
  448. vDSP_vfillD (&valueToFill, dest, 1, (vDSP_Length) num);
  449. #else
  450. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill,
  451. val,
  452. JUCE_LOAD_NONE,
  453. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  454. #endif
  455. }
  456. template <typename Size>
  457. void copyWithMultiply (float* dest, const float* src, float multiplier, Size num) noexcept
  458. {
  459. #if JUCE_USE_VDSP_FRAMEWORK
  460. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  461. #else
  462. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  463. Mode::mul (mult, s),
  464. JUCE_LOAD_SRC,
  465. JUCE_INCREMENT_SRC_DEST,
  466. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  467. #endif
  468. }
  469. template <typename Size>
  470. void copyWithMultiply (double* dest, const double* src, double multiplier, Size num) noexcept
  471. {
  472. #if JUCE_USE_VDSP_FRAMEWORK
  473. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  474. #else
  475. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  476. Mode::mul (mult, s),
  477. JUCE_LOAD_SRC,
  478. JUCE_INCREMENT_SRC_DEST,
  479. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  480. #endif
  481. }
  482. template <typename Size>
  483. void add (float* dest, float amount, Size num) noexcept
  484. {
  485. #if JUCE_USE_VDSP_FRAMEWORK
  486. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  487. #else
  488. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount,
  489. Mode::add (d, amountToAdd),
  490. JUCE_LOAD_DEST,
  491. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  492. #endif
  493. }
  494. template <typename Size>
  495. void add (double* dest, double amount, Size num) noexcept
  496. {
  497. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount,
  498. Mode::add (d, amountToAdd),
  499. JUCE_LOAD_DEST,
  500. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  501. }
  502. template <typename Size>
  503. void add (float* dest, const float* src, float amount, Size num) noexcept
  504. {
  505. #if JUCE_USE_VDSP_FRAMEWORK
  506. vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
  507. #else
  508. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount,
  509. Mode::add (am, s),
  510. JUCE_LOAD_SRC,
  511. JUCE_INCREMENT_SRC_DEST,
  512. const Mode::ParallelType am = Mode::load1 (amount);)
  513. #endif
  514. }
  515. template <typename Size>
  516. void add (double* dest, const double* src, double amount, Size num) noexcept
  517. {
  518. #if JUCE_USE_VDSP_FRAMEWORK
  519. vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
  520. #else
  521. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount,
  522. Mode::add (am, s),
  523. JUCE_LOAD_SRC,
  524. JUCE_INCREMENT_SRC_DEST,
  525. const Mode::ParallelType am = Mode::load1 (amount);)
  526. #endif
  527. }
  528. template <typename Size>
  529. void add (float* dest, const float* src, Size num) noexcept
  530. {
  531. #if JUCE_USE_VDSP_FRAMEWORK
  532. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  533. #else
  534. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i],
  535. Mode::add (d, s),
  536. JUCE_LOAD_SRC_DEST,
  537. JUCE_INCREMENT_SRC_DEST, )
  538. #endif
  539. }
  540. template <typename Size>
  541. void add (double* dest, const double* src, Size num) noexcept
  542. {
  543. #if JUCE_USE_VDSP_FRAMEWORK
  544. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  545. #else
  546. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i],
  547. Mode::add (d, s),
  548. JUCE_LOAD_SRC_DEST,
  549. JUCE_INCREMENT_SRC_DEST, )
  550. #endif
  551. }
  552. template <typename Size>
  553. void add (float* dest, const float* src1, const float* src2, Size num) noexcept
  554. {
  555. #if JUCE_USE_VDSP_FRAMEWORK
  556. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  557. #else
  558. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i],
  559. Mode::add (s1, s2),
  560. JUCE_LOAD_SRC1_SRC2,
  561. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  562. #endif
  563. }
  564. template <typename Size>
  565. void add (double* dest, const double* src1, const double* src2, Size num) noexcept
  566. {
  567. #if JUCE_USE_VDSP_FRAMEWORK
  568. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  569. #else
  570. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i],
  571. Mode::add (s1, s2),
  572. JUCE_LOAD_SRC1_SRC2,
  573. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  574. #endif
  575. }
  576. template <typename Size>
  577. void subtract (float* dest, const float* src, Size num) noexcept
  578. {
  579. #if JUCE_USE_VDSP_FRAMEWORK
  580. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  581. #else
  582. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i],
  583. Mode::sub (d, s),
  584. JUCE_LOAD_SRC_DEST,
  585. JUCE_INCREMENT_SRC_DEST, )
  586. #endif
  587. }
  588. template <typename Size>
  589. void subtract (double* dest, const double* src, Size num) noexcept
  590. {
  591. #if JUCE_USE_VDSP_FRAMEWORK
  592. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  593. #else
  594. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i],
  595. Mode::sub (d, s),
  596. JUCE_LOAD_SRC_DEST,
  597. JUCE_INCREMENT_SRC_DEST, )
  598. #endif
  599. }
  600. template <typename Size>
  601. void subtract (float* dest, const float* src1, const float* src2, Size num) noexcept
  602. {
  603. #if JUCE_USE_VDSP_FRAMEWORK
  604. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  605. #else
  606. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i],
  607. Mode::sub (s1, s2),
  608. JUCE_LOAD_SRC1_SRC2,
  609. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  610. #endif
  611. }
  612. template <typename Size>
  613. void subtract (double* dest, const double* src1, const double* src2, Size num) noexcept
  614. {
  615. #if JUCE_USE_VDSP_FRAMEWORK
  616. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  617. #else
  618. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i],
  619. Mode::sub (s1, s2),
  620. JUCE_LOAD_SRC1_SRC2,
  621. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  622. #endif
  623. }
  624. template <typename Size>
  625. void addWithMultiply (float* dest, const float* src, float multiplier, Size num) noexcept
  626. {
  627. #if JUCE_USE_VDSP_FRAMEWORK
  628. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  629. #else
  630. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  631. Mode::add (d, Mode::mul (mult, s)),
  632. JUCE_LOAD_SRC_DEST,
  633. JUCE_INCREMENT_SRC_DEST,
  634. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  635. #endif
  636. }
  637. template <typename Size>
  638. void addWithMultiply (double* dest, const double* src, double multiplier, Size num) noexcept
  639. {
  640. #if JUCE_USE_VDSP_FRAMEWORK
  641. vDSP_vsmaD (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  642. #else
  643. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  644. Mode::add (d, Mode::mul (mult, s)),
  645. JUCE_LOAD_SRC_DEST,
  646. JUCE_INCREMENT_SRC_DEST,
  647. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  648. #endif
  649. }
  650. template <typename Size>
  651. void addWithMultiply (float* dest, const float* src1, const float* src2, Size num) noexcept
  652. {
  653. #if JUCE_USE_VDSP_FRAMEWORK
  654. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  655. #else
  656. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i],
  657. Mode::add (d, Mode::mul (s1, s2)),
  658. JUCE_LOAD_SRC1_SRC2_DEST,
  659. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  660. #endif
  661. }
  662. template <typename Size>
  663. void addWithMultiply (double* dest, const double* src1, const double* src2, Size num) noexcept
  664. {
  665. #if JUCE_USE_VDSP_FRAMEWORK
  666. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  667. #else
  668. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i],
  669. Mode::add (d, Mode::mul (s1, s2)),
  670. JUCE_LOAD_SRC1_SRC2_DEST,
  671. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  672. #endif
  673. }
  674. template <typename Size>
  675. void subtractWithMultiply (float* dest, const float* src, float multiplier, Size num) noexcept
  676. {
  677. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier,
  678. Mode::sub (d, Mode::mul (mult, s)),
  679. JUCE_LOAD_SRC_DEST,
  680. JUCE_INCREMENT_SRC_DEST,
  681. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  682. }
  683. template <typename Size>
  684. void subtractWithMultiply (double* dest, const double* src, double multiplier, Size num) noexcept
  685. {
  686. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier,
  687. Mode::sub (d, Mode::mul (mult, s)),
  688. JUCE_LOAD_SRC_DEST,
  689. JUCE_INCREMENT_SRC_DEST,
  690. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  691. }
  692. template <typename Size>
  693. void subtractWithMultiply (float* dest, const float* src1, const float* src2, Size num) noexcept
  694. {
  695. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i],
  696. Mode::sub (d, Mode::mul (s1, s2)),
  697. JUCE_LOAD_SRC1_SRC2_DEST,
  698. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  699. }
  700. template <typename Size>
  701. void subtractWithMultiply (double* dest, const double* src1, const double* src2, Size num) noexcept
  702. {
  703. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i],
  704. Mode::sub (d, Mode::mul (s1, s2)),
  705. JUCE_LOAD_SRC1_SRC2_DEST,
  706. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  707. }
  708. template <typename Size>
  709. void multiply (float* dest, const float* src, Size num) noexcept
  710. {
  711. #if JUCE_USE_VDSP_FRAMEWORK
  712. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  713. #else
  714. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i],
  715. Mode::mul (d, s),
  716. JUCE_LOAD_SRC_DEST,
  717. JUCE_INCREMENT_SRC_DEST, )
  718. #endif
  719. }
  720. template <typename Size>
  721. void multiply (double* dest, const double* src, Size num) noexcept
  722. {
  723. #if JUCE_USE_VDSP_FRAMEWORK
  724. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  725. #else
  726. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i],
  727. Mode::mul (d, s),
  728. JUCE_LOAD_SRC_DEST,
  729. JUCE_INCREMENT_SRC_DEST, )
  730. #endif
  731. }
  732. template <typename Size>
  733. void multiply (float* dest, const float* src1, const float* src2, Size num) noexcept
  734. {
  735. #if JUCE_USE_VDSP_FRAMEWORK
  736. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  737. #else
  738. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i],
  739. Mode::mul (s1, s2),
  740. JUCE_LOAD_SRC1_SRC2,
  741. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  742. #endif
  743. }
  744. template <typename Size>
  745. void multiply (double* dest, const double* src1, const double* src2, Size num) noexcept
  746. {
  747. #if JUCE_USE_VDSP_FRAMEWORK
  748. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  749. #else
  750. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i],
  751. Mode::mul (s1, s2),
  752. JUCE_LOAD_SRC1_SRC2,
  753. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  754. #endif
  755. }
  756. template <typename Size>
  757. void multiply (float* dest, float multiplier, Size num) noexcept
  758. {
  759. #if JUCE_USE_VDSP_FRAMEWORK
  760. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  761. #else
  762. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier,
  763. Mode::mul (d, mult),
  764. JUCE_LOAD_DEST,
  765. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  766. #endif
  767. }
  768. template <typename Size>
  769. void multiply (double* dest, double multiplier, Size num) noexcept
  770. {
  771. #if JUCE_USE_VDSP_FRAMEWORK
  772. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  773. #else
  774. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier,
  775. Mode::mul (d, mult),
  776. JUCE_LOAD_DEST,
  777. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  778. #endif
  779. }
  780. template <typename Size>
  781. void multiply (float* dest, const float* src, float multiplier, Size num) noexcept
  782. {
  783. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  784. Mode::mul (mult, s),
  785. JUCE_LOAD_SRC,
  786. JUCE_INCREMENT_SRC_DEST,
  787. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  788. }
  789. template <typename Size>
  790. void multiply (double* dest, const double* src, double multiplier, Size num) noexcept
  791. {
  792. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  793. Mode::mul (mult, s),
  794. JUCE_LOAD_SRC,
  795. JUCE_INCREMENT_SRC_DEST,
  796. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  797. }
  798. template <typename Size>
  799. void negate (float* dest, const float* src, Size num) noexcept
  800. {
  801. #if JUCE_USE_VDSP_FRAMEWORK
  802. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  803. #else
  804. copyWithMultiply (dest, src, -1.0f, num);
  805. #endif
  806. }
  807. template <typename Size>
  808. void negate (double* dest, const double* src, Size num) noexcept
  809. {
  810. #if JUCE_USE_VDSP_FRAMEWORK
  811. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  812. #else
  813. copyWithMultiply (dest, src, -1.0f, num);
  814. #endif
  815. }
  816. template <typename Size>
  817. void abs (float* dest, const float* src, Size num) noexcept
  818. {
  819. #if JUCE_USE_VDSP_FRAMEWORK
  820. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  821. #else
  822. [[maybe_unused]] FloatVectorHelpers::signMask32 signMask;
  823. signMask.i = 0x7fffffffUL;
  824. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]),
  825. Mode::bit_and (s, mask),
  826. JUCE_LOAD_SRC,
  827. JUCE_INCREMENT_SRC_DEST,
  828. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  829. #endif
  830. }
  831. template <typename Size>
  832. void abs (double* dest, const double* src, Size num) noexcept
  833. {
  834. #if JUCE_USE_VDSP_FRAMEWORK
  835. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  836. #else
  837. [[maybe_unused]] FloatVectorHelpers::signMask64 signMask;
  838. signMask.i = 0x7fffffffffffffffULL;
  839. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]),
  840. Mode::bit_and (s, mask),
  841. JUCE_LOAD_SRC,
  842. JUCE_INCREMENT_SRC_DEST,
  843. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  844. #endif
  845. }
  846. template <typename Size>
  847. void min (float* dest, const float* src, float comp, Size num) noexcept
  848. {
  849. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp),
  850. Mode::min (s, cmp),
  851. JUCE_LOAD_SRC,
  852. JUCE_INCREMENT_SRC_DEST,
  853. const Mode::ParallelType cmp = Mode::load1 (comp);)
  854. }
  855. template <typename Size>
  856. void min (double* dest, const double* src, double comp, Size num) noexcept
  857. {
  858. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp),
  859. Mode::min (s, cmp),
  860. JUCE_LOAD_SRC,
  861. JUCE_INCREMENT_SRC_DEST,
  862. const Mode::ParallelType cmp = Mode::load1 (comp);)
  863. }
  864. template <typename Size>
  865. void min (float* dest, const float* src1, const float* src2, Size num) noexcept
  866. {
  867. #if JUCE_USE_VDSP_FRAMEWORK
  868. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  869. #else
  870. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]),
  871. Mode::min (s1, s2),
  872. JUCE_LOAD_SRC1_SRC2,
  873. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  874. #endif
  875. }
  876. template <typename Size>
  877. void min (double* dest, const double* src1, const double* src2, Size num) noexcept
  878. {
  879. #if JUCE_USE_VDSP_FRAMEWORK
  880. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  881. #else
  882. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]),
  883. Mode::min (s1, s2),
  884. JUCE_LOAD_SRC1_SRC2,
  885. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  886. #endif
  887. }
  888. template <typename Size>
  889. void max (float* dest, const float* src, float comp, Size num) noexcept
  890. {
  891. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp),
  892. Mode::max (s, cmp),
  893. JUCE_LOAD_SRC,
  894. JUCE_INCREMENT_SRC_DEST,
  895. const Mode::ParallelType cmp = Mode::load1 (comp);)
  896. }
  897. template <typename Size>
  898. void max (double* dest, const double* src, double comp, Size num) noexcept
  899. {
  900. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp),
  901. Mode::max (s, cmp),
  902. JUCE_LOAD_SRC,
  903. JUCE_INCREMENT_SRC_DEST,
  904. const Mode::ParallelType cmp = Mode::load1 (comp);)
  905. }
  906. template <typename Size>
  907. void max (float* dest, const float* src1, const float* src2, Size num) noexcept
  908. {
  909. #if JUCE_USE_VDSP_FRAMEWORK
  910. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  911. #else
  912. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]),
  913. Mode::max (s1, s2),
  914. JUCE_LOAD_SRC1_SRC2,
  915. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  916. #endif
  917. }
  918. template <typename Size>
  919. void max (double* dest, const double* src1, const double* src2, Size num) noexcept
  920. {
  921. #if JUCE_USE_VDSP_FRAMEWORK
  922. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  923. #else
  924. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]),
  925. Mode::max (s1, s2),
  926. JUCE_LOAD_SRC1_SRC2,
  927. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  928. #endif
  929. }
  930. template <typename Size>
  931. void clip (float* dest, const float* src, float low, float high, Size num) noexcept
  932. {
  933. jassert (high >= low);
  934. #if JUCE_USE_VDSP_FRAMEWORK
  935. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  936. #else
  937. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low),
  938. Mode::max (Mode::min (s, hi), lo),
  939. JUCE_LOAD_SRC,
  940. JUCE_INCREMENT_SRC_DEST,
  941. const Mode::ParallelType lo = Mode::load1 (low);
  942. const Mode::ParallelType hi = Mode::load1 (high);)
  943. #endif
  944. }
  945. template <typename Size>
  946. void clip (double* dest, const double* src, double low, double high, Size num) noexcept
  947. {
  948. jassert (high >= low);
  949. #if JUCE_USE_VDSP_FRAMEWORK
  950. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  951. #else
  952. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low),
  953. Mode::max (Mode::min (s, hi), lo),
  954. JUCE_LOAD_SRC,
  955. JUCE_INCREMENT_SRC_DEST,
  956. const Mode::ParallelType lo = Mode::load1 (low);
  957. const Mode::ParallelType hi = Mode::load1 (high);)
  958. #endif
  959. }
  960. template <typename Size>
  961. Range<float> findMinAndMax (const float* src, Size num) noexcept
  962. {
  963. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  964. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  965. #else
  966. return Range<float>::findMinAndMax (src, num);
  967. #endif
  968. }
  969. template <typename Size>
  970. Range<double> findMinAndMax (const double* src, Size num) noexcept
  971. {
  972. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  973. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  974. #else
  975. return Range<double>::findMinAndMax (src, num);
  976. #endif
  977. }
  978. template <typename Size>
  979. float findMinimum (const float* src, Size num) noexcept
  980. {
  981. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  982. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  983. #else
  984. return juce::findMinimum (src, num);
  985. #endif
  986. }
  987. template <typename Size>
  988. double findMinimum (const double* src, Size num) noexcept
  989. {
  990. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  991. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  992. #else
  993. return juce::findMinimum (src, num);
  994. #endif
  995. }
  996. template <typename Size>
  997. float findMaximum (const float* src, Size num) noexcept
  998. {
  999. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  1000. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  1001. #else
  1002. return juce::findMaximum (src, num);
  1003. #endif
  1004. }
  1005. template <typename Size>
  1006. double findMaximum (const double* src, Size num) noexcept
  1007. {
  1008. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  1009. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  1010. #else
  1011. return juce::findMaximum (src, num);
  1012. #endif
  1013. }
  1014. template <typename Size>
  1015. void convertFixedToFloat (float* dest, const int* src, float multiplier, Size num) noexcept
  1016. {
  1017. #if JUCE_USE_ARM_NEON
  1018. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
  1019. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  1020. JUCE_LOAD_NONE,
  1021. JUCE_INCREMENT_SRC_DEST, )
  1022. #else
  1023. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
  1024. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast<const __m128i*> (src)))),
  1025. JUCE_LOAD_NONE,
  1026. JUCE_INCREMENT_SRC_DEST,
  1027. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  1028. #endif
  1029. }
  1030. } // namespace
  1031. } // namespace FloatVectorHelpers
  1032. //==============================================================================
  1033. template <typename FloatType, typename CountType>
  1034. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::clear (FloatType* dest,
  1035. CountType numValues) noexcept
  1036. {
  1037. FloatVectorHelpers::clear (dest, numValues);
  1038. }
  1039. template <typename FloatType, typename CountType>
  1040. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::fill (FloatType* dest,
  1041. FloatType valueToFill,
  1042. CountType numValues) noexcept
  1043. {
  1044. FloatVectorHelpers::fill (dest, valueToFill, numValues);
  1045. }
  1046. template <typename FloatType, typename CountType>
  1047. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::copy (FloatType* dest,
  1048. const FloatType* src,
  1049. CountType numValues) noexcept
  1050. {
  1051. memcpy (dest, src, (size_t) numValues * sizeof (FloatType));
  1052. }
  1053. template <typename FloatType, typename CountType>
  1054. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::copyWithMultiply (FloatType* dest,
  1055. const FloatType* src,
  1056. FloatType multiplier,
  1057. CountType numValues) noexcept
  1058. {
  1059. FloatVectorHelpers::copyWithMultiply (dest, src, multiplier, numValues);
  1060. }
  1061. template <typename FloatType, typename CountType>
  1062. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::add (FloatType* dest,
  1063. FloatType amountToAdd,
  1064. CountType numValues) noexcept
  1065. {
  1066. FloatVectorHelpers::add (dest, amountToAdd, numValues);
  1067. }
  1068. template <typename FloatType, typename CountType>
  1069. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::add (FloatType* dest,
  1070. const FloatType* src,
  1071. FloatType amount,
  1072. CountType numValues) noexcept
  1073. {
  1074. FloatVectorHelpers::add (dest, src, amount, numValues);
  1075. }
  1076. template <typename FloatType, typename CountType>
  1077. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::add (FloatType* dest,
  1078. const FloatType* src,
  1079. CountType numValues) noexcept
  1080. {
  1081. FloatVectorHelpers::add (dest, src, numValues);
  1082. }
  1083. template <typename FloatType, typename CountType>
  1084. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::add (FloatType* dest,
  1085. const FloatType* src1,
  1086. const FloatType* src2,
  1087. CountType num) noexcept
  1088. {
  1089. FloatVectorHelpers::add (dest, src1, src2, num);
  1090. }
  1091. template <typename FloatType, typename CountType>
  1092. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::subtract (FloatType* dest,
  1093. const FloatType* src,
  1094. CountType numValues) noexcept
  1095. {
  1096. FloatVectorHelpers::subtract (dest, src, numValues);
  1097. }
  1098. template <typename FloatType, typename CountType>
  1099. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::subtract (FloatType* dest,
  1100. const FloatType* src1,
  1101. const FloatType* src2,
  1102. CountType num) noexcept
  1103. {
  1104. FloatVectorHelpers::subtract (dest, src1, src2, num);
  1105. }
  1106. template <typename FloatType, typename CountType>
  1107. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::addWithMultiply (FloatType* dest,
  1108. const FloatType* src,
  1109. FloatType multiplier,
  1110. CountType numValues) noexcept
  1111. {
  1112. FloatVectorHelpers::addWithMultiply (dest, src, multiplier, numValues);
  1113. }
  1114. template <typename FloatType, typename CountType>
  1115. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::addWithMultiply (FloatType* dest,
  1116. const FloatType* src1,
  1117. const FloatType* src2,
  1118. CountType num) noexcept
  1119. {
  1120. FloatVectorHelpers::addWithMultiply (dest, src1, src2, num);
  1121. }
  1122. template <typename FloatType, typename CountType>
  1123. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::subtractWithMultiply (FloatType* dest,
  1124. const FloatType* src,
  1125. FloatType multiplier,
  1126. CountType numValues) noexcept
  1127. {
  1128. FloatVectorHelpers::subtractWithMultiply (dest, src, multiplier, numValues);
  1129. }
  1130. template <typename FloatType, typename CountType>
  1131. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::subtractWithMultiply (FloatType* dest,
  1132. const FloatType* src1,
  1133. const FloatType* src2,
  1134. CountType num) noexcept
  1135. {
  1136. FloatVectorHelpers::subtractWithMultiply (dest, src1, src2, num);
  1137. }
  1138. template <typename FloatType, typename CountType>
  1139. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::multiply (FloatType* dest,
  1140. const FloatType* src,
  1141. CountType numValues) noexcept
  1142. {
  1143. FloatVectorHelpers::multiply (dest, src, numValues);
  1144. }
  1145. template <typename FloatType, typename CountType>
  1146. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::multiply (FloatType* dest,
  1147. const FloatType* src1,
  1148. const FloatType* src2,
  1149. CountType numValues) noexcept
  1150. {
  1151. FloatVectorHelpers::multiply (dest, src1, src2, numValues);
  1152. }
  1153. template <typename FloatType, typename CountType>
  1154. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::multiply (FloatType* dest,
  1155. FloatType multiplier,
  1156. CountType numValues) noexcept
  1157. {
  1158. FloatVectorHelpers::multiply (dest, multiplier, numValues);
  1159. }
  1160. template <typename FloatType, typename CountType>
  1161. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::multiply (FloatType* dest,
  1162. const FloatType* src,
  1163. FloatType multiplier,
  1164. CountType num) noexcept
  1165. {
  1166. FloatVectorHelpers::multiply (dest, src, multiplier, num);
  1167. }
  1168. template <typename FloatType, typename CountType>
  1169. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::negate (FloatType* dest,
  1170. const FloatType* src,
  1171. CountType numValues) noexcept
  1172. {
  1173. FloatVectorHelpers::negate (dest, src, numValues);
  1174. }
  1175. template <typename FloatType, typename CountType>
  1176. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::abs (FloatType* dest,
  1177. const FloatType* src,
  1178. CountType numValues) noexcept
  1179. {
  1180. FloatVectorHelpers::abs (dest, src, numValues);
  1181. }
  1182. template <typename FloatType, typename CountType>
  1183. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::min (FloatType* dest,
  1184. const FloatType* src,
  1185. FloatType comp,
  1186. CountType num) noexcept
  1187. {
  1188. FloatVectorHelpers::min (dest, src, comp, num);
  1189. }
  1190. template <typename FloatType, typename CountType>
  1191. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::min (FloatType* dest,
  1192. const FloatType* src1,
  1193. const FloatType* src2,
  1194. CountType num) noexcept
  1195. {
  1196. FloatVectorHelpers::min (dest, src1, src2, num);
  1197. }
  1198. template <typename FloatType, typename CountType>
  1199. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::max (FloatType* dest,
  1200. const FloatType* src,
  1201. FloatType comp,
  1202. CountType num) noexcept
  1203. {
  1204. FloatVectorHelpers::max (dest, src, comp, num);
  1205. }
  1206. template <typename FloatType, typename CountType>
  1207. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::max (FloatType* dest,
  1208. const FloatType* src1,
  1209. const FloatType* src2,
  1210. CountType num) noexcept
  1211. {
  1212. FloatVectorHelpers::max (dest, src1, src2, num);
  1213. }
  1214. template <typename FloatType, typename CountType>
  1215. void JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::clip (FloatType* dest,
  1216. const FloatType* src,
  1217. FloatType low,
  1218. FloatType high,
  1219. CountType num) noexcept
  1220. {
  1221. FloatVectorHelpers::clip (dest, src, low, high, num);
  1222. }
  1223. template <typename FloatType, typename CountType>
  1224. Range<FloatType> JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::findMinAndMax (const FloatType* src,
  1225. CountType numValues) noexcept
  1226. {
  1227. return FloatVectorHelpers::findMinAndMax (src, numValues);
  1228. }
  1229. template <typename FloatType, typename CountType>
  1230. FloatType JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::findMinimum (const FloatType* src,
  1231. CountType numValues) noexcept
  1232. {
  1233. return FloatVectorHelpers::findMinimum (src, numValues);
  1234. }
  1235. template <typename FloatType, typename CountType>
  1236. FloatType JUCE_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::findMaximum (const FloatType* src,
  1237. CountType numValues) noexcept
  1238. {
  1239. return FloatVectorHelpers::findMaximum (src, numValues);
  1240. }
  1241. template struct FloatVectorOperationsBase<float, int>;
  1242. template struct FloatVectorOperationsBase<float, size_t>;
  1243. template struct FloatVectorOperationsBase<double, int>;
  1244. template struct FloatVectorOperationsBase<double, size_t>;
  1245. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept
  1246. {
  1247. FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num);
  1248. }
  1249. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  1250. {
  1251. FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num);
  1252. }
  1253. intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
  1254. {
  1255. intptr_t fpsr = 0;
  1256. #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
  1257. fpsr = static_cast<intptr_t> (_mm_getcsr());
  1258. #elif (JUCE_64BIT && JUCE_ARM) || JUCE_USE_ARM_NEON
  1259. #if _MSC_VER
  1260. // _control87 returns static values for x86 bits that don't exist on arm
  1261. // to emulate x86 behaviour. We are only ever interested in de-normal bits
  1262. // so mask out only those.
  1263. fpsr = (intptr_t) (_control87 (0, 0) & _MCW_DN);
  1264. #else
  1265. #if JUCE_64BIT
  1266. asm volatile("mrs %0, fpcr"
  1267. : "=r"(fpsr));
  1268. #elif JUCE_USE_ARM_NEON
  1269. asm volatile("vmrs %0, fpscr"
  1270. : "=r"(fpsr));
  1271. #endif
  1272. #endif
  1273. #else
  1274. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  1275. jassertfalse; // No support for getting the floating point status register for your platform
  1276. #endif
  1277. #endif
  1278. return fpsr;
  1279. }
  1280. void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister ([[maybe_unused]] intptr_t fpsr) noexcept
  1281. {
  1282. #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
  1283. // the volatile keyword here is needed to workaround a bug in AppleClang 13.0
  1284. // which aggressively optimises away the variable otherwise
  1285. volatile auto fpsr_w = static_cast<uint32_t> (fpsr);
  1286. _mm_setcsr (fpsr_w);
  1287. #elif (JUCE_64BIT && JUCE_ARM) || JUCE_USE_ARM_NEON
  1288. #if _MSC_VER
  1289. _control87 ((unsigned int) fpsr, _MCW_DN);
  1290. #else
  1291. #if JUCE_64BIT
  1292. asm volatile("msr fpcr, %0"
  1293. :
  1294. : "ri"(fpsr));
  1295. #elif JUCE_USE_ARM_NEON
  1296. asm volatile("vmsr fpscr, %0"
  1297. :
  1298. : "ri"(fpsr));
  1299. #endif
  1300. #endif
  1301. #else
  1302. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  1303. jassertfalse; // No support for getting the floating point status register for your platform
  1304. #endif
  1305. #endif
  1306. }
  1307. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode ([[maybe_unused]] bool shouldEnable) noexcept
  1308. {
  1309. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
  1310. #if JUCE_USE_SSE_INTRINSICS
  1311. intptr_t mask = _MM_FLUSH_ZERO_MASK;
  1312. #else /*JUCE_USE_ARM_NEON*/
  1313. intptr_t mask = (1 << 24 /* FZ */);
  1314. #endif
  1315. setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0));
  1316. #else
  1317. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  1318. jassertfalse; // No support for flush to zero mode on your platform
  1319. #endif
  1320. #endif
  1321. }
  1322. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport ([[maybe_unused]] bool shouldDisable) noexcept
  1323. {
  1324. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
  1325. #if JUCE_USE_SSE_INTRINSICS
  1326. intptr_t mask = 0x8040;
  1327. #else /*JUCE_USE_ARM_NEON*/
  1328. intptr_t mask = (1 << 24 /* FZ */);
  1329. #endif
  1330. setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldDisable ? mask : 0));
  1331. #else
  1332. #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
  1333. jassertfalse; // No support for disable denormals mode on your platform
  1334. #endif
  1335. #endif
  1336. }
  1337. bool JUCE_CALLTYPE FloatVectorOperations::areDenormalsDisabled() noexcept
  1338. {
  1339. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
  1340. #if JUCE_USE_SSE_INTRINSICS
  1341. intptr_t mask = 0x8040;
  1342. #else /*JUCE_USE_ARM_NEON*/
  1343. intptr_t mask = (1 << 24 /* FZ */);
  1344. #endif
  1345. return ((getFpStatusRegister() & mask) == mask);
  1346. #else
  1347. return false;
  1348. #endif
  1349. }
  1350. ScopedNoDenormals::ScopedNoDenormals() noexcept
  1351. {
  1352. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
  1353. #if JUCE_USE_SSE_INTRINSICS
  1354. intptr_t mask = 0x8040;
  1355. #else /*JUCE_USE_ARM_NEON*/
  1356. intptr_t mask = (1 << 24 /* FZ */);
  1357. #endif
  1358. fpsr = FloatVectorOperations::getFpStatusRegister();
  1359. FloatVectorOperations::setFpStatusRegister (fpsr | mask);
  1360. #endif
  1361. }
  1362. ScopedNoDenormals::~ScopedNoDenormals() noexcept
  1363. {
  1364. #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
  1365. FloatVectorOperations::setFpStatusRegister (fpsr);
  1366. #endif
  1367. }
  1368. //==============================================================================
  1369. //==============================================================================
  1370. #if JUCE_UNIT_TESTS
  1371. class FloatVectorOperationsTests : public UnitTest
  1372. {
  1373. public:
  1374. FloatVectorOperationsTests()
  1375. : UnitTest ("FloatVectorOperations", UnitTestCategories::audio)
  1376. {}
  1377. template <typename ValueType>
  1378. struct TestRunner
  1379. {
  1380. static void runTest (UnitTest& u, Random random)
  1381. {
  1382. const int range = random.nextBool() ? 500 : 10;
  1383. const int num = random.nextInt (range) + 1;
  1384. HeapBlock<ValueType> buffer1 (num + 16), buffer2 (num + 16);
  1385. HeapBlock<int> buffer3 (num + 16, true);
  1386. #if JUCE_ARM
  1387. ValueType* const data1 = buffer1;
  1388. ValueType* const data2 = buffer2;
  1389. int* const int1 = buffer3;
  1390. #else
  1391. // These tests deliberately operate on misaligned memory and will be flagged up by
  1392. // checks for undefined behavior!
  1393. ValueType* const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16));
  1394. ValueType* const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16));
  1395. int* const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16));
  1396. #endif
  1397. fillRandomly (random, data1, num);
  1398. fillRandomly (random, data2, num);
  1399. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  1400. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  1401. u.expect (minMax1 == minMax2);
  1402. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  1403. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  1404. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  1405. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  1406. FloatVectorOperations::clear (data1, num);
  1407. u.expect (areAllValuesEqual (data1, num, 0));
  1408. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  1409. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  1410. FloatVectorOperations::add (data1, (ValueType) 2, num);
  1411. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  1412. FloatVectorOperations::copy (data2, data1, num);
  1413. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  1414. FloatVectorOperations::add (data2, data1, num);
  1415. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  1416. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  1417. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  1418. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  1419. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  1420. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  1421. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  1422. FloatVectorOperations::multiply (data1, data2, num);
  1423. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  1424. FloatVectorOperations::negate (data2, data1, num);
  1425. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  1426. FloatVectorOperations::subtract (data1, data2, num);
  1427. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  1428. FloatVectorOperations::abs (data1, data2, num);
  1429. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  1430. FloatVectorOperations::abs (data2, data1, num);
  1431. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  1432. fillRandomly (random, int1, num);
  1433. doConversionTest (u, data1, data2, int1, num);
  1434. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  1435. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  1436. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  1437. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  1438. }
  1439. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  1440. {
  1441. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  1442. convertFixed (data2, int1, 2.0f, num);
  1443. u.expect (buffersMatch (data1, data2, num));
  1444. }
  1445. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  1446. static void fillRandomly (Random& random, ValueType* d, int num)
  1447. {
  1448. while (--num >= 0)
  1449. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  1450. }
  1451. static void fillRandomly (Random& random, int* d, int num)
  1452. {
  1453. while (--num >= 0)
  1454. *d++ = random.nextInt();
  1455. }
  1456. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  1457. {
  1458. while (--num >= 0)
  1459. *d++ = (float) *s++ * multiplier;
  1460. }
  1461. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  1462. {
  1463. while (--num >= 0)
  1464. if (*d++ != target)
  1465. return false;
  1466. return true;
  1467. }
  1468. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  1469. {
  1470. while (--num >= 0)
  1471. if (! valuesMatch (*d1++, *d2++))
  1472. return false;
  1473. return true;
  1474. }
  1475. static bool valuesMatch (ValueType v1, ValueType v2)
  1476. {
  1477. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  1478. }
  1479. };
  1480. void runTest() override
  1481. {
  1482. beginTest ("FloatVectorOperations");
  1483. for (int i = 1000; --i >= 0;)
  1484. {
  1485. TestRunner<float>::runTest (*this, getRandom());
  1486. TestRunner<double>::runTest (*this, getRandom());
  1487. }
  1488. }
  1489. };
  1490. static FloatVectorOperationsTests vectorOpTests;
  1491. #endif
  1492. } // namespace juce